OLD | NEW |
(Empty) | |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 """General statistical or mathematical functions.""" |
| 6 |
| 7 import math |
| 8 |
| 9 |
| 10 def TruncatedMean(data_set, truncate_percent): |
| 11 """Calculates the truncated mean of a set of values. |
| 12 |
| 13 Note that this isn't just the mean of the set of values with the highest |
| 14 and lowest values discarded; the non-discarded values are also weighted |
| 15 differently depending how many values are discarded. |
| 16 |
| 17 Args: |
| 18 data_set: Non-empty list of values. |
| 19 truncate_percent: The % from the upper and lower portions of the data set |
| 20 to discard, expressed as a value in [0, 1]. |
| 21 |
| 22 Returns: |
| 23 The truncated mean as a float. |
| 24 |
| 25 Raises: |
| 26 TypeError: The data set was empty after discarding values. |
| 27 """ |
| 28 if len(data_set) > 2: |
| 29 data_set = sorted(data_set) |
| 30 |
| 31 discard_num_float = len(data_set) * truncate_percent |
| 32 discard_num_int = int(math.floor(discard_num_float)) |
| 33 kept_weight = len(data_set) - discard_num_float * 2 |
| 34 |
| 35 data_set = data_set[discard_num_int:len(data_set)-discard_num_int] |
| 36 |
| 37 weight_left = 1.0 - (discard_num_float - discard_num_int) |
| 38 |
| 39 if weight_left < 1: |
| 40 # If the % to discard leaves a fractional portion, need to weight those |
| 41 # values. |
| 42 unweighted_vals = data_set[1:len(data_set)-1] |
| 43 weighted_vals = [data_set[0], data_set[len(data_set)-1]] |
| 44 weighted_vals = [w * weight_left for w in weighted_vals] |
| 45 data_set = weighted_vals + unweighted_vals |
| 46 else: |
| 47 kept_weight = len(data_set) |
| 48 |
| 49 truncated_mean = reduce(lambda x, y: float(x) + float(y), |
| 50 data_set) / kept_weight |
| 51 |
| 52 return truncated_mean |
| 53 |
| 54 |
| 55 def Mean(values): |
| 56 """Calculates the arithmetic mean of a list of values.""" |
| 57 return TruncatedMean(values, 0.0) |
| 58 |
| 59 |
| 60 def StandardDeviation(values): |
| 61 """Calculates the sample standard deviation of the given list of values.""" |
| 62 if len(values) == 1: |
| 63 return 0.0 |
| 64 |
| 65 mean = Mean(values) |
| 66 differences_from_mean = [float(x) - mean for x in values] |
| 67 squared_differences = [float(x * x) for x in differences_from_mean] |
| 68 variance = sum(squared_differences) / (len(values) - 1) |
| 69 std_dev = math.sqrt(variance) |
| 70 |
| 71 return std_dev |
| 72 |
| 73 |
| 74 def RelativeChange(before, after): |
| 75 """Returns the relative change of before and after, relative to before. |
| 76 |
| 77 There are several different ways to define relative difference between |
| 78 two numbers; sometimes it is defined as relative to the smaller number, |
| 79 or to the mean of the two numbers. This version returns the difference |
| 80 relative to the first of the two numbers. |
| 81 |
| 82 Args: |
| 83 before: A number representing an earlier value. |
| 84 after: Another number, representing a later value. |
| 85 |
| 86 Returns: |
| 87 A non-negative floating point number; 0.1 represents a 10% change. |
| 88 """ |
| 89 if before == after: |
| 90 return 0.0 |
| 91 if before == 0: |
| 92 return float('nan') |
| 93 difference = after - before |
| 94 return math.fabs(difference / before) |
| 95 |
| 96 |
| 97 def PooledStandardError(work_sets): |
| 98 """Calculates the pooled sample standard error for a set of samples. |
| 99 |
| 100 Args: |
| 101 work_sets: A collection of collections of numbers. |
| 102 |
| 103 Returns: |
| 104 Pooled sample standard error. |
| 105 """ |
| 106 numerator = 0.0 |
| 107 denominator1 = 0.0 |
| 108 denominator2 = 0.0 |
| 109 |
| 110 for current_set in work_sets: |
| 111 std_dev = StandardDeviation(current_set) |
| 112 numerator += (len(current_set) - 1) * std_dev ** 2 |
| 113 denominator1 += len(current_set) - 1 |
| 114 if len(current_set) > 0: |
| 115 denominator2 += 1.0 / len(current_set) |
| 116 |
| 117 if denominator1 == 0: |
| 118 return 0.0 |
| 119 |
| 120 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) |
| 121 |
| 122 |
| 123 # Redefining built-in 'StandardError' |
| 124 # pylint: disable=W0622 |
| 125 def StandardError(values): |
| 126 """Calculates the standard error of a list of values.""" |
| 127 if len(values) <= 1: |
| 128 return 0.0 |
| 129 std_dev = StandardDeviation(values) |
| 130 return std_dev / math.sqrt(len(values)) |
OLD | NEW |