tools/auto_bisect/math_utils.py - Issue 417013003: Move statistical functions in bisect script to their own module.

Unified Diff: tools/auto_bisect/math_utils.py

Issue 417013003: Move statistical functions in bisect script to their own module. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Rebased and fixed test failures Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/auto_bisect/math_utils.py

diff --git a/tools/auto_bisect/math_utils.py b/tools/auto_bisect/math_utils.py

new file mode 100644

index 0000000000000000000000000000000000000000..c81bca64bd53f620c8393231deec3f4f01899458

--- /dev/null

+++ b/tools/auto_bisect/math_utils.py

@@ -0,0 +1,120 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""General statistical or mathematical functions."""

+import math

+def TruncatedMean(data_set, truncate_percent):

+ """Calculates the truncated mean of a set of values.

+ Note that this isn't just the mean of the set of values with the highest

+ and lowest values discarded; the non-discarded values are also weighted

+ differently depending how many values are discarded.

+ Args:

+ data_set: Non-empty list of values.

+ truncate_percent: The % from the upper and lower portions of the data set

+ to discard, expressed as a value in [0, 1].

+ Returns:

+ The truncated mean as a float.

+ Raises:

+ TypeError: The data set was empty after discarding values.

+ """

+ if len(data_set) > 2:

+ data_set = sorted(data_set)

+ discard_num_float = len(data_set) * truncate_percent

+ discard_num_int = int(math.floor(discard_num_float))

+ kept_weight = len(data_set) - discard_num_float * 2

+ data_set = data_set[discard_num_int:len(data_set)-discard_num_int]

+ weight_left = 1.0 - (discard_num_float - discard_num_int)

+ if weight_left < 1:

+ # If the % to discard leaves a fractional portion, need to weight those

+ # values.

+ unweighted_vals = data_set[1:len(data_set)-1]

+ weighted_vals = [data_set[0], data_set[len(data_set)-1]]

+ weighted_vals = [w * weight_left for w in weighted_vals]

+ data_set = weighted_vals + unweighted_vals

+ else:

+ kept_weight = len(data_set)

+ truncated_mean = reduce(lambda x, y: float(x) + float(y),

+ data_set) / kept_weight

+ return truncated_mean

+def Mean(values):

+ """Calculates the arithmetic mean of a list of values."""

+ return TruncatedMean(values, 0.0)

+def StandardDeviation(values):

+ """Calculates the sample standard deviation of the given list of values."""

+ if len(values) == 1:

+ return 0.0

+ mean = Mean(values)

+ differences_from_mean = [float(x) - mean for x in values]

+ squared_differences = [float(x * x) for x in differences_from_mean]

+ variance = sum(squared_differences) / (len(values) - 1)

+ std_dev = math.sqrt(variance)

+ return std_dev

+def RelativeChange(before, after):

+ """Returns the relative change of before and after, relative to before.

+ There are several different ways to define relative difference between

+ two numbers; sometimes it is defined as relative to the smaller number,

+ or to the mean of the two numbers. This version returns the difference

+ relative to the first of the two numbers.

+ Args:

+ before: A number representing an earlier value.

+ after: Another number, representing a later value.

+ Returns:

+ A non-negative floating point number; 0.1 represents a 10% change.

+ """

+ if before == after:

+ return 0.0

+ if before == 0:

+ return float('nan')

+ difference = after - before

+ return math.fabs(difference / before)

+def PooledStandardError(work_sets):

+ numerator = 0.0

+ denominator1 = 0.0

+ denominator2 = 0.0

+ for current_set in work_sets:

+ std_dev = StandardDeviation(current_set)

+ numerator += (len(current_set) - 1) * std_dev ** 2

+ denominator1 += len(current_set) - 1

+ denominator2 += 1.0 / len(current_set)

prasadv 2014/07/28 21:31:17 Nit: if current_set not empty then evaluate denomi

+ if denominator1:

+ return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)

+ return 0.0

+# Redefining built-in 'StandardError'

+# pylint: disable=W0622

+def StandardError(values):

+ """Calculates the standard error of a list of values."""

+ if len(values) <= 1:

+ return 0.0

+ std_dev = StandardDeviation(values)

+ return std_dev / math.sqrt(len(values))

« no previous file with comments | « no previous file | tools/auto_bisect/math_utils_test.py » ('j') | no next file with comments »