tools/auto_bisect/math_utils.py - Issue 417013003: Move statistical functions in bisect script to their own module.

Side by Side Diff: tools/auto_bisect/math_utils.py

Issue 417013003: Move statistical functions in bisect script to their own module. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Respond to nit Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright 2014 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """General statistical or mathematical functions."""

	6

	7 import math

	8

	9

	10 def TruncatedMean(data_set, truncate_percent):

	11 """Calculates the truncated mean of a set of values.

	12

	13 Note that this isn't just the mean of the set of values with the highest

	14 and lowest values discarded; the non-discarded values are also weighted

	15 differently depending how many values are discarded.

	16

	17 Args:

	18 data_set: Non-empty list of values.

	19 truncate_percent: The % from the upper and lower portions of the data set

	20 to discard, expressed as a value in [0, 1].

	21

	22 Returns:

	23 The truncated mean as a float.

	24

	25 Raises:

	26 TypeError: The data set was empty after discarding values.

	27 """

	28 if len(data_set) > 2:

	29 data_set = sorted(data_set)

	30

	31 discard_num_float = len(data_set) * truncate_percent

	32 discard_num_int = int(math.floor(discard_num_float))

	33 kept_weight = len(data_set) - discard_num_float * 2

	34

	35 data_set = data_set[discard_num_int:len(data_set)-discard_num_int]

	36

	37 weight_left = 1.0 - (discard_num_float - discard_num_int)

	38

	39 if weight_left < 1:

	40 # If the % to discard leaves a fractional portion, need to weight those

	41 # values.

	42 unweighted_vals = data_set[1:len(data_set)-1]

	43 weighted_vals = [data_set[0], data_set[len(data_set)-1]]

	44 weighted_vals = [w * weight_left for w in weighted_vals]

	45 data_set = weighted_vals + unweighted_vals

	46 else:

	47 kept_weight = len(data_set)

	48

	49 truncated_mean = reduce(lambda x, y: float(x) + float(y),

	50 data_set) / kept_weight

	51

	52 return truncated_mean

	53

	54

	55 def Mean(values):

	56 """Calculates the arithmetic mean of a list of values."""

	57 return TruncatedMean(values, 0.0)

	58

	59

	60 def StandardDeviation(values):

	61 """Calculates the sample standard deviation of the given list of values."""

	62 if len(values) == 1:

	63 return 0.0

	64

	65 mean = Mean(values)

	66 differences_from_mean = [float(x) - mean for x in values]

	67 squared_differences = [float(x * x) for x in differences_from_mean]

	68 variance = sum(squared_differences) / (len(values) - 1)

	69 std_dev = math.sqrt(variance)

	70

	71 return std_dev

	72

	73

	74 def RelativeChange(before, after):

	75 """Returns the relative change of before and after, relative to before.

	76

	77 There are several different ways to define relative difference between

	78 two numbers; sometimes it is defined as relative to the smaller number,

	79 or to the mean of the two numbers. This version returns the difference

	80 relative to the first of the two numbers.

	81

	82 Args:

	83 before: A number representing an earlier value.

	84 after: Another number, representing a later value.

	85

	86 Returns:

	87 A non-negative floating point number; 0.1 represents a 10% change.

	88 """

	89 if before == after:

	90 return 0.0

	91 if before == 0:

	92 return float('nan')

	93 difference = after - before

	94 return math.fabs(difference / before)

	95

	96

	97 def PooledStandardError(work_sets):

	98 """Calculates the pooled sample standard error for a set of samples.

	99

	100 Args:

	101 work_sets: A collection of collections of numbers.

	102

	103 Returns:

	104 Pooled sample standard error.

	105 """

	106 numerator = 0.0

	107 denominator1 = 0.0

	108 denominator2 = 0.0

	109

	110 for current_set in work_sets:

	111 std_dev = StandardDeviation(current_set)

	112 numerator += (len(current_set) - 1) * std_dev ** 2

	113 denominator1 += len(current_set) - 1

	114 if len(current_set) > 0:

	115 denominator2 += 1.0 / len(current_set)

	116

	117 if denominator1 == 0:

	118 return 0.0

	119

	120 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)

	121

	122

	123 # Redefining built-in 'StandardError'

	124 # pylint: disable=W0622

	125 def StandardError(values):

	126 """Calculates the standard error of a list of values."""

	127 if len(values) <= 1:

	128 return 0.0

	129 std_dev = StandardDeviation(values)

	130 return std_dev / math.sqrt(len(values))

OLD	NEW

« no previous file with comments | « no previous file | tools/auto_bisect/math_utils_test.py » ('j') | no next file with comments »