OLD | NEW |
---|---|
(Empty) | |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 """General statistical or mathematical functions.""" | |
6 | |
7 import math | |
8 | |
9 | |
10 def TruncatedMean(data_set, truncate_percent): | |
11 """Calculates the truncated mean of a set of values. | |
12 | |
13 Note that this isn't just the mean of the set of values with the highest | |
14 and lowest values discarded; the non-discarded values are also weighted | |
15 differently depending how many values are discarded. | |
16 | |
17 Args: | |
18 data_set: Non-empty list of values. | |
19 truncate_percent: The % from the upper and lower portions of the data set | |
20 to discard, expressed as a value in [0, 1]. | |
21 | |
22 Returns: | |
23 The truncated mean as a float. | |
24 | |
25 Raises: | |
26 TypeError: The data set was empty after discarding values. | |
27 """ | |
28 if len(data_set) > 2: | |
29 data_set = sorted(data_set) | |
30 | |
31 discard_num_float = len(data_set) * truncate_percent | |
32 discard_num_int = int(math.floor(discard_num_float)) | |
33 kept_weight = len(data_set) - discard_num_float * 2 | |
34 | |
35 data_set = data_set[discard_num_int:len(data_set)-discard_num_int] | |
36 | |
37 weight_left = 1.0 - (discard_num_float - discard_num_int) | |
38 | |
39 if weight_left < 1: | |
40 # If the % to discard leaves a fractional portion, need to weight those | |
41 # values. | |
42 unweighted_vals = data_set[1:len(data_set)-1] | |
43 weighted_vals = [data_set[0], data_set[len(data_set)-1]] | |
44 weighted_vals = [w * weight_left for w in weighted_vals] | |
45 data_set = weighted_vals + unweighted_vals | |
46 else: | |
47 kept_weight = len(data_set) | |
48 | |
49 truncated_mean = reduce(lambda x, y: float(x) + float(y), | |
50 data_set) / kept_weight | |
51 | |
52 return truncated_mean | |
53 | |
54 | |
55 def Mean(values): | |
56 """Calculates the arithmetic mean of a list of values.""" | |
57 return TruncatedMean(values, 0.0) | |
58 | |
59 | |
60 def StandardDeviation(values): | |
61 """Calculates the sample standard deviation of the given list of values.""" | |
62 if len(values) == 1: | |
63 return 0.0 | |
64 | |
65 mean = Mean(values) | |
66 differences_from_mean = [float(x) - mean for x in values] | |
67 squared_differences = [float(x * x) for x in differences_from_mean] | |
68 variance = sum(squared_differences) / (len(values) - 1) | |
69 std_dev = math.sqrt(variance) | |
70 | |
71 return std_dev | |
72 | |
73 | |
74 def RelativeChange(before, after): | |
75 """Returns the relative change of before and after, relative to before. | |
76 | |
77 There are several different ways to define relative difference between | |
78 two numbers; sometimes it is defined as relative to the smaller number, | |
79 or to the mean of the two numbers. This version returns the difference | |
80 relative to the first of the two numbers. | |
81 | |
82 Args: | |
83 before: A number representing an earlier value. | |
84 after: Another number, representing a later value. | |
85 | |
86 Returns: | |
87 A non-negative floating point number; 0.1 represents a 10% change. | |
88 """ | |
89 if before == after: | |
90 return 0.0 | |
91 if before == 0: | |
92 return float('nan') | |
93 difference = after - before | |
94 return math.fabs(difference / before) | |
95 | |
96 | |
97 def PooledStandardError(work_sets): | |
98 numerator = 0.0 | |
99 denominator1 = 0.0 | |
100 denominator2 = 0.0 | |
101 | |
102 for current_set in work_sets: | |
103 std_dev = StandardDeviation(current_set) | |
104 numerator += (len(current_set) - 1) * std_dev ** 2 | |
105 denominator1 += len(current_set) - 1 | |
106 denominator2 += 1.0 / len(current_set) | |
prasadv
2014/07/28 21:31:17
Nit: if current_set not empty then evaluate denomi
| |
107 | |
108 if denominator1: | |
109 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) | |
110 return 0.0 | |
111 | |
112 | |
113 # Redefining built-in 'StandardError' | |
114 # pylint: disable=W0622 | |
115 def StandardError(values): | |
116 """Calculates the standard error of a list of values.""" | |
117 if len(values) <= 1: | |
118 return 0.0 | |
119 std_dev = StandardDeviation(values) | |
120 return std_dev / math.sqrt(len(values)) | |
OLD | NEW |