| OLD | NEW |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """General statistical or mathematical functions.""" | 5 """General statistical or mathematical functions.""" |
| 6 | 6 |
| 7 import math | 7 import math |
| 8 | 8 |
| 9 | 9 |
| 10 def TruncatedMean(data_set, truncate_percent): | 10 def TruncatedMean(data_set, truncate_proportion): |
| 11 """Calculates the truncated mean of a set of values. | 11 """Calculates the truncated mean of a set of values. |
| 12 | 12 |
| 13 Note that this isn't just the mean of the set of values with the highest | 13 Note that this isn't just the mean of the set of values with the highest |
| 14 and lowest values discarded; the non-discarded values are also weighted | 14 and lowest values discarded; the non-discarded values are also weighted |
| 15 differently depending how many values are discarded. | 15 differently depending how many values are discarded. |
| 16 | 16 |
| 17 NOTE: If there's not much benefit from this keeping and weighting |
| 18 partial values, it might be better to use a simplified truncated mean |
| 19 function without weighting. |
| 20 |
| 17 Args: | 21 Args: |
| 18 data_set: Non-empty list of values. | 22 data_set: Non-empty list of values. |
| 19 truncate_percent: How much of the upper and lower portions of the data set | 23 truncate_proportion: How much of the upper and lower portions of the data |
| 20 to discard, expressed as a value in [0, 1]. | 24 set to discard, expressed as a value in the range [0, 1]. |
| 25 Note: a value of 0.5 or greater would be meaningless |
| 21 | 26 |
| 22 Returns: | 27 Returns: |
| 23 The truncated mean as a float. | 28 The truncated mean as a float. |
| 24 | 29 |
| 25 Raises: | 30 Raises: |
| 26 TypeError: The data set was empty after discarding values. | 31 TypeError: The data set was empty after discarding values. |
| 27 """ | 32 """ |
| 28 if len(data_set) > 2: | 33 if len(data_set) > 2: |
| 29 data_set = sorted(data_set) | 34 data_set = sorted(data_set) |
| 30 | 35 |
| 31 discard_num_float = len(data_set) * truncate_percent | 36 discard_num_float = len(data_set) * truncate_proportion |
| 32 discard_num_int = int(math.floor(discard_num_float)) | 37 discard_num_int = int(math.floor(discard_num_float)) |
| 33 kept_weight = len(data_set) - discard_num_float * 2 | 38 kept_weight = len(data_set) - (discard_num_float * 2) |
| 34 | 39 |
| 35 data_set = data_set[discard_num_int:len(data_set)-discard_num_int] | 40 data_set = data_set[discard_num_int:len(data_set)-discard_num_int] |
| 36 | 41 |
| 37 weight_left = 1.0 - (discard_num_float - discard_num_int) | 42 weight_left = 1.0 - (discard_num_float - discard_num_int) |
| 38 | 43 |
| 39 if weight_left < 1: | 44 if weight_left < 1: |
| 40 # If the % to discard leaves a fractional portion, need to weight those | 45 # If the % to discard leaves a fractional portion, need to weight those |
| 41 # values. | 46 # values. |
| 42 unweighted_vals = data_set[1:len(data_set)-1] | 47 unweighted_vals = data_set[1:len(data_set)-1] |
| 43 weighted_vals = [data_set[0], data_set[len(data_set)-1]] | 48 weighted_vals = [data_set[0], data_set[len(data_set)-1]] |
| 44 weighted_vals = [w * weight_left for w in weighted_vals] | 49 weighted_vals = [w * weight_left for w in weighted_vals] |
| 45 data_set = weighted_vals + unweighted_vals | 50 data_set = weighted_vals + unweighted_vals |
| 46 else: | 51 else: |
| 47 kept_weight = len(data_set) | 52 kept_weight = len(data_set) |
| 48 | 53 |
| 49 truncated_mean = reduce(lambda x, y: float(x) + float(y), | 54 data_sum = reduce(lambda x, y: float(x) + float(y), data_set) |
| 50 data_set) / kept_weight | 55 truncated_mean = data_sum / kept_weight |
| 51 | |
| 52 return truncated_mean | 56 return truncated_mean |
| 53 | 57 |
| 54 | 58 |
| 55 def Mean(values): | 59 def Mean(values): |
| 56 """Calculates the arithmetic mean of a list of values.""" | 60 """Calculates the arithmetic mean of a list of values.""" |
| 57 return TruncatedMean(values, 0.0) | 61 return TruncatedMean(values, 0.0) |
| 58 | 62 |
| 59 | 63 |
| 60 def Variance(values): | 64 def Variance(values): |
| 61 """Calculates the sample variance.""" | 65 """Calculates the sample variance.""" |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 119 if denominator1 == 0: | 123 if denominator1 == 0: |
| 120 return 0.0 | 124 return 0.0 |
| 121 | 125 |
| 122 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) | 126 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) |
| 123 | 127 |
| 124 | 128 |
| 125 # Redefining built-in 'StandardError' | 129 # Redefining built-in 'StandardError' |
| 126 # pylint: disable=W0622 | 130 # pylint: disable=W0622 |
| 127 def StandardError(values): | 131 def StandardError(values): |
| 128 """Calculates the standard error of a list of values.""" | 132 """Calculates the standard error of a list of values.""" |
| 133 # NOTE: This behavior of returning 0.0 in the case of an empty list is |
| 134 # inconsistent with Variance and StandardDeviation above. |
| 129 if len(values) <= 1: | 135 if len(values) <= 1: |
| 130 return 0.0 | 136 return 0.0 |
| 131 std_dev = StandardDeviation(values) | 137 std_dev = StandardDeviation(values) |
| 132 return std_dev / math.sqrt(len(values)) | 138 return std_dev / math.sqrt(len(values)) |
| 139 |
| OLD | NEW |