| Index: tools/auto_bisect/math_utils.py
|
| diff --git a/tools/auto_bisect/math_utils.py b/tools/auto_bisect/math_utils.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..fe94f53583bd255f7ecb07902ab114f863e4380e
|
| --- /dev/null
|
| +++ b/tools/auto_bisect/math_utils.py
|
| @@ -0,0 +1,130 @@
|
| +# Copyright 2014 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""General statistical or mathematical functions."""
|
| +
|
| +import math
|
| +
|
| +
|
| +def TruncatedMean(data_set, truncate_percent):
|
| + """Calculates the truncated mean of a set of values.
|
| +
|
| + Note that this isn't just the mean of the set of values with the highest
|
| + and lowest values discarded; the non-discarded values are also weighted
|
| + differently depending how many values are discarded.
|
| +
|
| + Args:
|
| + data_set: Non-empty list of values.
|
| + truncate_percent: The % from the upper and lower portions of the data set
|
| + to discard, expressed as a value in [0, 1].
|
| +
|
| + Returns:
|
| + The truncated mean as a float.
|
| +
|
| + Raises:
|
| + TypeError: The data set was empty after discarding values.
|
| + """
|
| + if len(data_set) > 2:
|
| + data_set = sorted(data_set)
|
| +
|
| + discard_num_float = len(data_set) * truncate_percent
|
| + discard_num_int = int(math.floor(discard_num_float))
|
| + kept_weight = len(data_set) - discard_num_float * 2
|
| +
|
| + data_set = data_set[discard_num_int:len(data_set)-discard_num_int]
|
| +
|
| + weight_left = 1.0 - (discard_num_float - discard_num_int)
|
| +
|
| + if weight_left < 1:
|
| + # If the % to discard leaves a fractional portion, need to weight those
|
| + # values.
|
| + unweighted_vals = data_set[1:len(data_set)-1]
|
| + weighted_vals = [data_set[0], data_set[len(data_set)-1]]
|
| + weighted_vals = [w * weight_left for w in weighted_vals]
|
| + data_set = weighted_vals + unweighted_vals
|
| + else:
|
| + kept_weight = len(data_set)
|
| +
|
| + truncated_mean = reduce(lambda x, y: float(x) + float(y),
|
| + data_set) / kept_weight
|
| +
|
| + return truncated_mean
|
| +
|
| +
|
| +def Mean(values):
|
| + """Calculates the arithmetic mean of a list of values."""
|
| + return TruncatedMean(values, 0.0)
|
| +
|
| +
|
| +def StandardDeviation(values):
|
| + """Calculates the sample standard deviation of the given list of values."""
|
| + if len(values) == 1:
|
| + return 0.0
|
| +
|
| + mean = Mean(values)
|
| + differences_from_mean = [float(x) - mean for x in values]
|
| + squared_differences = [float(x * x) for x in differences_from_mean]
|
| + variance = sum(squared_differences) / (len(values) - 1)
|
| + std_dev = math.sqrt(variance)
|
| +
|
| + return std_dev
|
| +
|
| +
|
| +def RelativeChange(before, after):
|
| + """Returns the relative change of before and after, relative to before.
|
| +
|
| + There are several different ways to define relative difference between
|
| + two numbers; sometimes it is defined as relative to the smaller number,
|
| + or to the mean of the two numbers. This version returns the difference
|
| + relative to the first of the two numbers.
|
| +
|
| + Args:
|
| + before: A number representing an earlier value.
|
| + after: Another number, representing a later value.
|
| +
|
| + Returns:
|
| + A non-negative floating point number; 0.1 represents a 10% change.
|
| + """
|
| + if before == after:
|
| + return 0.0
|
| + if before == 0:
|
| + return float('nan')
|
| + difference = after - before
|
| + return math.fabs(difference / before)
|
| +
|
| +
|
| +def PooledStandardError(work_sets):
|
| + """Calculates the pooled sample standard error for a set of samples.
|
| +
|
| + Args:
|
| + work_sets: A collection of collections of numbers.
|
| +
|
| + Returns:
|
| + Pooled sample standard error.
|
| + """
|
| + numerator = 0.0
|
| + denominator1 = 0.0
|
| + denominator2 = 0.0
|
| +
|
| + for current_set in work_sets:
|
| + std_dev = StandardDeviation(current_set)
|
| + numerator += (len(current_set) - 1) * std_dev ** 2
|
| + denominator1 += len(current_set) - 1
|
| + if len(current_set) > 0:
|
| + denominator2 += 1.0 / len(current_set)
|
| +
|
| + if denominator1 == 0:
|
| + return 0.0
|
| +
|
| + return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)
|
| +
|
| +
|
| +# Redefining built-in 'StandardError'
|
| +# pylint: disable=W0622
|
| +def StandardError(values):
|
| + """Calculates the standard error of a list of values."""
|
| + if len(values) <= 1:
|
| + return 0.0
|
| + std_dev = StandardDeviation(values)
|
| + return std_dev / math.sqrt(len(values))
|
|
|