OLD | NEW |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """General statistical or mathematical functions.""" | 5 """General statistical or mathematical functions.""" |
6 | 6 |
7 import math | 7 import math |
8 | 8 |
9 | 9 |
10 def TruncatedMean(data_set, truncate_percent): | 10 def TruncatedMean(data_set, truncate_proportion): |
11 """Calculates the truncated mean of a set of values. | 11 """Calculates the truncated mean of a set of values. |
12 | 12 |
13 Note that this isn't just the mean of the set of values with the highest | 13 Note that this isn't just the mean of the set of values with the highest |
14 and lowest values discarded; the non-discarded values are also weighted | 14 and lowest values discarded; the non-discarded values are also weighted |
15 differently depending how many values are discarded. | 15 differently depending how many values are discarded. |
16 | 16 |
| 17 NOTE: If there's not much benefit from this keeping and weighting |
| 18 partial values, it might be better to use a simplified truncated mean |
| 19 function without weighting. |
| 20 |
17 Args: | 21 Args: |
18 data_set: Non-empty list of values. | 22 data_set: Non-empty list of values. |
19 truncate_percent: How much of the upper and lower portions of the data set | 23 truncate_proportion: How much of the upper and lower portions of the data |
20 to discard, expressed as a value in [0, 1]. | 24 set to discard, expressed as a value in the range [0, 1]. |
| 25 Note: a value of 0.5 or greater would be meaningless |
21 | 26 |
22 Returns: | 27 Returns: |
23 The truncated mean as a float. | 28 The truncated mean as a float. |
24 | 29 |
25 Raises: | 30 Raises: |
26 TypeError: The data set was empty after discarding values. | 31 TypeError: The data set was empty after discarding values. |
27 """ | 32 """ |
28 if len(data_set) > 2: | 33 if len(data_set) > 2: |
29 data_set = sorted(data_set) | 34 data_set = sorted(data_set) |
30 | 35 |
31 discard_num_float = len(data_set) * truncate_percent | 36 discard_num_float = len(data_set) * truncate_proportion |
32 discard_num_int = int(math.floor(discard_num_float)) | 37 discard_num_int = int(math.floor(discard_num_float)) |
33 kept_weight = len(data_set) - discard_num_float * 2 | 38 kept_weight = len(data_set) - (discard_num_float * 2) |
34 | 39 |
35 data_set = data_set[discard_num_int:len(data_set)-discard_num_int] | 40 data_set = data_set[discard_num_int:len(data_set)-discard_num_int] |
36 | 41 |
37 weight_left = 1.0 - (discard_num_float - discard_num_int) | 42 weight_left = 1.0 - (discard_num_float - discard_num_int) |
38 | 43 |
39 if weight_left < 1: | 44 if weight_left < 1: |
40 # If the % to discard leaves a fractional portion, need to weight those | 45 # If the % to discard leaves a fractional portion, need to weight those |
41 # values. | 46 # values. |
42 unweighted_vals = data_set[1:len(data_set)-1] | 47 unweighted_vals = data_set[1:len(data_set)-1] |
43 weighted_vals = [data_set[0], data_set[len(data_set)-1]] | 48 weighted_vals = [data_set[0], data_set[len(data_set)-1]] |
44 weighted_vals = [w * weight_left for w in weighted_vals] | 49 weighted_vals = [w * weight_left for w in weighted_vals] |
45 data_set = weighted_vals + unweighted_vals | 50 data_set = weighted_vals + unweighted_vals |
46 else: | 51 else: |
47 kept_weight = len(data_set) | 52 kept_weight = len(data_set) |
48 | 53 |
49 truncated_mean = reduce(lambda x, y: float(x) + float(y), | 54 data_sum = reduce(lambda x, y: float(x) + float(y), data_set) |
50 data_set) / kept_weight | 55 truncated_mean = data_sum / kept_weight |
51 | |
52 return truncated_mean | 56 return truncated_mean |
53 | 57 |
54 | 58 |
55 def Mean(values): | 59 def Mean(values): |
56 """Calculates the arithmetic mean of a list of values.""" | 60 """Calculates the arithmetic mean of a list of values.""" |
57 return TruncatedMean(values, 0.0) | 61 return TruncatedMean(values, 0.0) |
58 | 62 |
59 | 63 |
60 def Variance(values): | 64 def Variance(values): |
61 """Calculates the sample variance.""" | 65 """Calculates the sample variance.""" |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
119 if denominator1 == 0: | 123 if denominator1 == 0: |
120 return 0.0 | 124 return 0.0 |
121 | 125 |
122 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) | 126 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) |
123 | 127 |
124 | 128 |
125 # Redefining built-in 'StandardError' | 129 # Redefining built-in 'StandardError' |
126 # pylint: disable=W0622 | 130 # pylint: disable=W0622 |
127 def StandardError(values): | 131 def StandardError(values): |
128 """Calculates the standard error of a list of values.""" | 132 """Calculates the standard error of a list of values.""" |
| 133 # NOTE: This behavior of returning 0.0 in the case of an empty list is |
| 134 # inconsistent with Variance and StandardDeviation above. |
129 if len(values) <= 1: | 135 if len(values) <= 1: |
130 return 0.0 | 136 return 0.0 |
131 std_dev = StandardDeviation(values) | 137 std_dev = StandardDeviation(values) |
132 return std_dev / math.sqrt(len(values)) | 138 return std_dev / math.sqrt(len(values)) |
| 139 |
OLD | NEW |