Index: tools/auto_bisect/math_utils.py |
diff --git a/tools/auto_bisect/math_utils.py b/tools/auto_bisect/math_utils.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..fe94f53583bd255f7ecb07902ab114f863e4380e |
--- /dev/null |
+++ b/tools/auto_bisect/math_utils.py |
@@ -0,0 +1,130 @@ |
+# Copyright 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""General statistical or mathematical functions.""" |
+ |
+import math |
+ |
+ |
+def TruncatedMean(data_set, truncate_percent): |
+ """Calculates the truncated mean of a set of values. |
+ |
+ Note that this isn't just the mean of the set of values with the highest |
+ and lowest values discarded; the non-discarded values are also weighted |
+ differently depending how many values are discarded. |
+ |
+ Args: |
+ data_set: Non-empty list of values. |
+ truncate_percent: The % from the upper and lower portions of the data set |
+ to discard, expressed as a value in [0, 1]. |
+ |
+ Returns: |
+ The truncated mean as a float. |
+ |
+ Raises: |
+ TypeError: The data set was empty after discarding values. |
+ """ |
+ if len(data_set) > 2: |
+ data_set = sorted(data_set) |
+ |
+ discard_num_float = len(data_set) * truncate_percent |
+ discard_num_int = int(math.floor(discard_num_float)) |
+ kept_weight = len(data_set) - discard_num_float * 2 |
+ |
+ data_set = data_set[discard_num_int:len(data_set)-discard_num_int] |
+ |
+ weight_left = 1.0 - (discard_num_float - discard_num_int) |
+ |
+ if weight_left < 1: |
+ # If the % to discard leaves a fractional portion, need to weight those |
+ # values. |
+ unweighted_vals = data_set[1:len(data_set)-1] |
+ weighted_vals = [data_set[0], data_set[len(data_set)-1]] |
+ weighted_vals = [w * weight_left for w in weighted_vals] |
+ data_set = weighted_vals + unweighted_vals |
+ else: |
+ kept_weight = len(data_set) |
+ |
+ truncated_mean = reduce(lambda x, y: float(x) + float(y), |
+ data_set) / kept_weight |
+ |
+ return truncated_mean |
+ |
+ |
+def Mean(values): |
+ """Calculates the arithmetic mean of a list of values.""" |
+ return TruncatedMean(values, 0.0) |
+ |
+ |
+def StandardDeviation(values): |
+ """Calculates the sample standard deviation of the given list of values.""" |
+ if len(values) == 1: |
+ return 0.0 |
+ |
+ mean = Mean(values) |
+ differences_from_mean = [float(x) - mean for x in values] |
+ squared_differences = [float(x * x) for x in differences_from_mean] |
+ variance = sum(squared_differences) / (len(values) - 1) |
+ std_dev = math.sqrt(variance) |
+ |
+ return std_dev |
+ |
+ |
+def RelativeChange(before, after): |
+ """Returns the relative change of before and after, relative to before. |
+ |
+ There are several different ways to define relative difference between |
+ two numbers; sometimes it is defined as relative to the smaller number, |
+ or to the mean of the two numbers. This version returns the difference |
+ relative to the first of the two numbers. |
+ |
+ Args: |
+ before: A number representing an earlier value. |
+ after: Another number, representing a later value. |
+ |
+ Returns: |
+ A non-negative floating point number; 0.1 represents a 10% change. |
+ """ |
+ if before == after: |
+ return 0.0 |
+ if before == 0: |
+ return float('nan') |
+ difference = after - before |
+ return math.fabs(difference / before) |
+ |
+ |
+def PooledStandardError(work_sets): |
+ """Calculates the pooled sample standard error for a set of samples. |
+ |
+ Args: |
+ work_sets: A collection of collections of numbers. |
+ |
+ Returns: |
+ Pooled sample standard error. |
+ """ |
+ numerator = 0.0 |
+ denominator1 = 0.0 |
+ denominator2 = 0.0 |
+ |
+ for current_set in work_sets: |
+ std_dev = StandardDeviation(current_set) |
+ numerator += (len(current_set) - 1) * std_dev ** 2 |
+ denominator1 += len(current_set) - 1 |
+ if len(current_set) > 0: |
+ denominator2 += 1.0 / len(current_set) |
+ |
+ if denominator1 == 0: |
+ return 0.0 |
+ |
+ return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) |
+ |
+ |
+# Redefining built-in 'StandardError' |
+# pylint: disable=W0622 |
+def StandardError(values): |
+ """Calculates the standard error of a list of values.""" |
+ if len(values) <= 1: |
+ return 0.0 |
+ std_dev = StandardDeviation(values) |
+ return std_dev / math.sqrt(len(values)) |