OLD | NEW |
| (Empty) |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 """General statistical or mathematical functions.""" | |
6 | |
7 import math | |
8 | |
9 from recipe_engine import recipe_api | |
10 | |
11 | |
12 class MathUtilsApi(recipe_api.RecipeApi): | |
13 | |
14 @staticmethod | |
15 def truncated_mean(data_set, truncate_fraction): | |
16 """Calculates the truncated mean of a set of values. | |
17 | |
18 Note that this isn't just the mean of the set of values with the highest | |
19 and lowest values discarded; the non-discarded values are also weighted | |
20 differently depending how many values are discarded. | |
21 | |
22 Args: | |
23 data_set: Non-empty list of values. | |
24 truncate_fraction: How much of the upper and lower portions of the data | |
25 set to discard, expressed as a value in [0, 0.5). | |
26 | |
27 Returns: | |
28 The truncated mean as a float. | |
29 | |
30 Raises: | |
31 TypeError: The data set was empty after discarding values. | |
32 """ | |
33 if truncate_fraction >= 0.5: | |
34 raise ValueError('Trying to truncate %d percent of the list.' % | |
35 (200 * truncate_fraction)) | |
36 if len(data_set) > 2: | |
37 data_set = sorted(data_set) | |
38 | |
39 discard_num_float = len(data_set) * truncate_fraction | |
40 discard_num_int = int(math.floor(discard_num_float)) | |
41 kept_weight = len(data_set) - discard_num_float * 2 | |
42 | |
43 data_set = data_set[discard_num_int:len(data_set)-discard_num_int] | |
44 | |
45 weight_left = 1.0 - (discard_num_float - discard_num_int) | |
46 | |
47 if weight_left < 1: | |
48 # If the % to discard leaves a fractional portion, need to weight those | |
49 # values. | |
50 unweighted_vals = data_set[1:len(data_set)-1] | |
51 weighted_vals = [data_set[0], data_set[len(data_set)-1]] | |
52 weighted_vals = [w * weight_left for w in weighted_vals] | |
53 data_set = weighted_vals + unweighted_vals | |
54 else: | |
55 kept_weight = len(data_set) | |
56 | |
57 _truncated_mean = reduce(lambda x, y: float(x) + float(y), | |
58 data_set) / kept_weight | |
59 return _truncated_mean | |
60 | |
61 @staticmethod | |
62 def mean(values): | |
63 """Calculates the arithmetic mean of a list of values.""" | |
64 return MathUtilsApi.truncated_mean(values, 0.0) | |
65 | |
66 @staticmethod | |
67 def variance(values): | |
68 """Calculates the sample variance.""" | |
69 if len(values) == 1: | |
70 return 0.0 | |
71 mean = MathUtilsApi.mean(values) | |
72 differences_from_mean = [float(x) - mean for x in values] | |
73 squared_differences = [float(x * x) for x in differences_from_mean] | |
74 _variance = sum(squared_differences) / (len(values) - 1) | |
75 return _variance | |
76 | |
77 @staticmethod | |
78 def standard_deviation(values): | |
79 """Calculates the sample standard deviation of the given list of values.""" | |
80 return math.sqrt(MathUtilsApi.variance(values)) | |
81 | |
82 @staticmethod | |
83 def relative_change(before, after): | |
84 """Returns the relative change of before and after, relative to before. | |
85 | |
86 There are several different ways to define relative difference between | |
87 two numbers; sometimes it is defined as relative to the smaller number, | |
88 or to the mean of the two numbers. This version returns the difference | |
89 relative to the first of the two numbers. | |
90 | |
91 Args: | |
92 before: A number representing an earlier value. | |
93 after: Another number, representing a later value. | |
94 | |
95 Returns: | |
96 A non-negative floating point number; 0.1 represents a 10% change. | |
97 """ | |
98 if before == after: | |
99 return 0.0 | |
100 if before == 0: | |
101 return float('nan') | |
102 difference = after - before | |
103 return math.fabs(difference / before) | |
104 | |
105 @staticmethod | |
106 def pooled_standard_error(work_sets): | |
107 """Calculates the pooled sample standard error for a set of samples. | |
108 | |
109 Args: | |
110 work_sets: A collection of collections of numbers. | |
111 | |
112 Returns: | |
113 Pooled sample standard error. | |
114 """ | |
115 numerator = 0.0 | |
116 denominator1 = 0.0 | |
117 denominator2 = 0.0 | |
118 | |
119 for current_set in work_sets: | |
120 std_dev = MathUtilsApi.standard_deviation(current_set) | |
121 numerator += (len(current_set) - 1) * std_dev ** 2 | |
122 denominator1 += len(current_set) - 1 | |
123 if len(current_set) > 0: | |
124 denominator2 += 1.0 / len(current_set) | |
125 | |
126 if denominator1 == 0: | |
127 return 0.0 | |
128 | |
129 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) | |
130 | |
131 @staticmethod | |
132 def standard_error(values): | |
133 """Calculates the standard error of a list of values.""" | |
134 if len(values) <= 1: | |
135 return 0.0 | |
136 std_dev = MathUtilsApi.standard_deviation(values) | |
137 return std_dev / math.sqrt(len(values)) | |
138 | |
139 #Copied this from BisectResults | |
140 @staticmethod | |
141 def confidence_score(sample1, sample2, | |
142 accept_single_bad_or_good=False): | |
143 """Calculates a confidence score. | |
144 | |
145 This score is a percentage which represents our degree of confidence in the | |
146 proposition that the good results and bad results are distinct groups, and | |
147 their differences aren't due to chance alone. | |
148 | |
149 | |
150 Args: | |
151 sample1: A flat list of "good" result numbers. | |
152 sample2: A flat list of "bad" result numbers. | |
153 accept_single_bad_or_good: If True, computes confidence even if there is | |
154 just one bad or good revision, otherwise single good or bad revision | |
155 always returns 0.0 confidence. This flag will probably get away when | |
156 we will implement expanding the bisect range by one more revision for | |
157 such case. | |
158 | |
159 Returns: | |
160 A number in the range [0, 100]. | |
161 """ | |
162 # If there's only one item in either list, this means only one revision was | |
163 # classified good or bad; this isn't good enough evidence to make a | |
164 # decision. If an empty list was passed, that also implies zero confidence. | |
165 if not accept_single_bad_or_good: | |
166 if len(sample1) <= 1 or len(sample2) <= 1: | |
167 return 0.0 | |
168 | |
169 # If there were only empty lists in either of the lists (this is unexpected | |
170 # and normally shouldn't happen), then we also want to return 0. | |
171 if not sample1 or not sample2: | |
172 return 0.0 | |
173 | |
174 # The p-value is approximately the probability of obtaining the given set | |
175 # of good and bad values just by chance. | |
176 _, _, p_value = MathUtilsApi.welchs_t_test(sample1, sample2) | |
177 return 100.0 * (1.0 - p_value) | |
178 | |
179 @staticmethod | |
180 def welchs_t_test(sample1, sample2): | |
181 """Performs Welch's t-test on the two samples. | |
182 | |
183 Welch's t-test is an adaptation of Student's t-test which is used when the | |
184 two samples may have unequal variances. It is also an independent two-sample | |
185 t-test. | |
186 | |
187 Args: | |
188 sample1: A collection of numbers. | |
189 sample2: Another collection of numbers. | |
190 | |
191 Returns: | |
192 A 3-tuple (t-statistic, degrees of freedom, p-value). | |
193 """ | |
194 mean1 = MathUtilsApi.mean(sample1) | |
195 mean2 = MathUtilsApi.mean(sample2) | |
196 v1 = MathUtilsApi.variance(sample1) | |
197 v2 = MathUtilsApi.variance(sample2) | |
198 n1 = len(sample1) | |
199 n2 = len(sample2) | |
200 t = MathUtilsApi._t_value(mean1, mean2, v1, v2, n1, n2) | |
201 df = MathUtilsApi._degrees_of_freedom(v1, v2, n1, n2) | |
202 p = MathUtilsApi._lookup_p_value(t, df) | |
203 return t, df, p | |
204 | |
205 @staticmethod | |
206 def _t_value(mean1, mean2, v1, v2, n1, n2): | |
207 """Calculates a t-statistic value using the formula for Welch's t-test. | |
208 | |
209 The t value can be thought of as a signal-to-noise ratio; a higher t-value | |
210 tells you that the groups are more different. | |
211 | |
212 Args: | |
213 mean1: Mean of sample 1. | |
214 mean2: Mean of sample 2. | |
215 v1: Variance of sample 1. | |
216 v2: Variance of sample 2. | |
217 n1: Sample size of sample 1. | |
218 n2: Sample size of sample 2. | |
219 | |
220 Returns: | |
221 A t value, which may be negative or positive. | |
222 """ | |
223 # If variance of both segments is zero, return some large t-value. | |
224 if v1 == 0 and v2 == 0: | |
225 return 1000.0 | |
226 return (mean1 - mean2) / (math.sqrt(v1 / n1 + v2 / n2)) | |
227 | |
228 @staticmethod | |
229 def _degrees_of_freedom(v1, v2, n1, n2): | |
230 """Calculates degrees of freedom using the Welch-Satterthwaite formula. | |
231 | |
232 Degrees of freedom is a measure of sample size. For other types of tests, | |
233 degrees of freedom is sometimes N - 1, where N is the sample size. However, | |
234 | |
235 Args: | |
236 v1: Variance of sample 1. | |
237 v2: Variance of sample 2. | |
238 n1: Size of sample 2. | |
239 n2: Size of sample 2. | |
240 | |
241 Returns: | |
242 An estimate of degrees of freedom. Must be at least 1.0. | |
243 """ | |
244 # When there's no variance in either sample, return 1. | |
245 if v1 == 0 and v2 == 0: | |
246 return 1 | |
247 # If the sample size is too small, also return the minimum (1). | |
248 if n1 <= 1 or n2 <= 2: | |
249 return 1 | |
250 df = (((v1 / n1 + v2 / n2) ** 2) / | |
251 ((v1 ** 2) / ((n1 ** 2) * (n1 - 1)) + | |
252 (v2 ** 2) / ((n2 ** 2) * (n2 - 1)))) | |
253 return max(1, df) | |
254 | |
255 # Below is a hard-coded table for looking up p-values. | |
256 # | |
257 # Normally, p-values are calculated based on the t-distribution formula. | |
258 # Looking up pre-calculated values is a less accurate but less complicated | |
259 # alternative. | |
260 # | |
261 # Reference: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf | |
262 | |
263 # A list of p-values for a two-tailed test. The entries correspond to to | |
264 # entries in the rows of the table below. | |
265 TWO_TAIL = [1, 0.20, 0.10, 0.05, 0.02, 0.01, 0.005, 0.002, 0.001] | |
266 | |
267 # A map of degrees of freedom to lists of t-values. The index of the t-value | |
268 # can be used to look up the corresponding p-value. | |
269 TABLE = { | |
270 1: [0, 3.078, 6.314, 12.706, 31.820, 63.657, 127.321, 318.309, 636.619], | |
271 2: [0, 1.886, 2.920, 4.303, 6.965, 9.925, 14.089, 22.327, 31.599], | |
272 3: [0, 1.638, 2.353, 3.182, 4.541, 5.841, 7.453, 10.215, 12.924], | |
273 4: [0, 1.533, 2.132, 2.776, 3.747, 4.604, 5.598, 7.173, 8.610], | |
274 5: [0, 1.476, 2.015, 2.571, 3.365, 4.032, 4.773, 5.893, 6.869], | |
275 6: [0, 1.440, 1.943, 2.447, 3.143, 3.707, 4.317, 5.208, 5.959], | |
276 7: [0, 1.415, 1.895, 2.365, 2.998, 3.499, 4.029, 4.785, 5.408], | |
277 8: [0, 1.397, 1.860, 2.306, 2.897, 3.355, 3.833, 4.501, 5.041], | |
278 9: [0, 1.383, 1.833, 2.262, 2.821, 3.250, 3.690, 4.297, 4.781], | |
279 10: [0, 1.372, 1.812, 2.228, 2.764, 3.169, 3.581, 4.144, 4.587], | |
280 11: [0, 1.363, 1.796, 2.201, 2.718, 3.106, 3.497, 4.025, 4.437], | |
281 12: [0, 1.356, 1.782, 2.179, 2.681, 3.055, 3.428, 3.930, 4.318], | |
282 13: [0, 1.350, 1.771, 2.160, 2.650, 3.012, 3.372, 3.852, 4.221], | |
283 14: [0, 1.345, 1.761, 2.145, 2.625, 2.977, 3.326, 3.787, 4.140], | |
284 15: [0, 1.341, 1.753, 2.131, 2.602, 2.947, 3.286, 3.733, 4.073], | |
285 16: [0, 1.337, 1.746, 2.120, 2.584, 2.921, 3.252, 3.686, 4.015], | |
286 17: [0, 1.333, 1.740, 2.110, 2.567, 2.898, 3.222, 3.646, 3.965], | |
287 18: [0, 1.330, 1.734, 2.101, 2.552, 2.878, 3.197, 3.610, 3.922], | |
288 19: [0, 1.328, 1.729, 2.093, 2.539, 2.861, 3.174, 3.579, 3.883], | |
289 20: [0, 1.325, 1.725, 2.086, 2.528, 2.845, 3.153, 3.552, 3.850], | |
290 21: [0, 1.323, 1.721, 2.080, 2.518, 2.831, 3.135, 3.527, 3.819], | |
291 22: [0, 1.321, 1.717, 2.074, 2.508, 2.819, 3.119, 3.505, 3.792], | |
292 23: [0, 1.319, 1.714, 2.069, 2.500, 2.807, 3.104, 3.485, 3.768], | |
293 24: [0, 1.318, 1.711, 2.064, 2.492, 2.797, 3.090, 3.467, 3.745], | |
294 25: [0, 1.316, 1.708, 2.060, 2.485, 2.787, 3.078, 3.450, 3.725], | |
295 26: [0, 1.315, 1.706, 2.056, 2.479, 2.779, 3.067, 3.435, 3.707], | |
296 27: [0, 1.314, 1.703, 2.052, 2.473, 2.771, 3.057, 3.421, 3.690], | |
297 28: [0, 1.313, 1.701, 2.048, 2.467, 2.763, 3.047, 3.408, 3.674], | |
298 29: [0, 1.311, 1.699, 2.045, 2.462, 2.756, 3.038, 3.396, 3.659], | |
299 30: [0, 1.310, 1.697, 2.042, 2.457, 2.750, 3.030, 3.385, 3.646], | |
300 31: [0, 1.309, 1.695, 2.040, 2.453, 2.744, 3.022, 3.375, 3.633], | |
301 32: [0, 1.309, 1.694, 2.037, 2.449, 2.738, 3.015, 3.365, 3.622], | |
302 33: [0, 1.308, 1.692, 2.035, 2.445, 2.733, 3.008, 3.356, 3.611], | |
303 34: [0, 1.307, 1.691, 2.032, 2.441, 2.728, 3.002, 3.348, 3.601], | |
304 35: [0, 1.306, 1.690, 2.030, 2.438, 2.724, 2.996, 3.340, 3.591], | |
305 36: [0, 1.306, 1.688, 2.028, 2.434, 2.719, 2.991, 3.333, 3.582], | |
306 37: [0, 1.305, 1.687, 2.026, 2.431, 2.715, 2.985, 3.326, 3.574], | |
307 38: [0, 1.304, 1.686, 2.024, 2.429, 2.712, 2.980, 3.319, 3.566], | |
308 39: [0, 1.304, 1.685, 2.023, 2.426, 2.708, 2.976, 3.313, 3.558], | |
309 40: [0, 1.303, 1.684, 2.021, 2.423, 2.704, 2.971, 3.307, 3.551], | |
310 42: [0, 1.302, 1.682, 2.018, 2.418, 2.698, 2.963, 3.296, 3.538], | |
311 44: [0, 1.301, 1.680, 2.015, 2.414, 2.692, 2.956, 3.286, 3.526], | |
312 46: [0, 1.300, 1.679, 2.013, 2.410, 2.687, 2.949, 3.277, 3.515], | |
313 48: [0, 1.299, 1.677, 2.011, 2.407, 2.682, 2.943, 3.269, 3.505], | |
314 50: [0, 1.299, 1.676, 2.009, 2.403, 2.678, 2.937, 3.261, 3.496], | |
315 60: [0, 1.296, 1.671, 2.000, 2.390, 2.660, 2.915, 3.232, 3.460], | |
316 70: [0, 1.294, 1.667, 1.994, 2.381, 2.648, 2.899, 3.211, 3.435], | |
317 80: [0, 1.292, 1.664, 1.990, 2.374, 2.639, 2.887, 3.195, 3.416], | |
318 90: [0, 1.291, 1.662, 1.987, 2.369, 2.632, 2.878, 3.183, 3.402], | |
319 100: [0, 1.290, 1.660, 1.984, 2.364, 2.626, 2.871, 3.174, 3.391], | |
320 120: [0, 1.289, 1.658, 1.980, 2.358, 2.617, 2.860, 3.160, 3.373], | |
321 150: [0, 1.287, 1.655, 1.976, 2.351, 2.609, 2.849, 3.145, 3.357], | |
322 200: [0, 1.286, 1.652, 1.972, 2.345, 2.601, 2.839, 3.131, 3.340], | |
323 300: [0, 1.284, 1.650, 1.968, 2.339, 2.592, 2.828, 3.118, 3.323], | |
324 500: [0, 1.283, 1.648, 1.965, 2.334, 2.586, 2.820, 3.107, 3.310], | |
325 } | |
326 | |
327 @staticmethod | |
328 def _lookup_p_value(t, df): | |
329 """Looks up a p-value in a t-distribution table. | |
330 | |
331 Args: | |
332 t: A t statistic value; the result of a t-test. | |
333 df: Number of degrees of freedom. | |
334 | |
335 Returns: | |
336 A p-value, which represents the likelihood of obtaining a result at least | |
337 as extreme as the one observed just by chance (the null hypothesis). | |
338 """ | |
339 assert df >= 1, 'Degrees of freedom must be positive' | |
340 | |
341 # We ignore the negative sign on the t-value because our null hypothesis | |
342 # is that the two samples are the same; our alternative hypothesis is that | |
343 # the second sample is lesser OR greater than the first. | |
344 t = abs(t) | |
345 | |
346 def greatest_smaller(nums, target): | |
347 """Returns the largest number that is <= the target number.""" | |
348 lesser_equal = [n for n in nums if n <= target] | |
349 assert lesser_equal, 'No number in number list <= target.' | |
350 return max(lesser_equal) | |
351 | |
352 df_key = greatest_smaller(MathUtilsApi.TABLE.keys(), df) | |
353 t_table_row = MathUtilsApi.TABLE[df_key] | |
354 approximate_t_value = greatest_smaller(t_table_row, t) | |
355 t_value_index = t_table_row.index(approximate_t_value) | |
356 | |
357 return MathUtilsApi.TWO_TAIL[t_value_index] | |
OLD | NEW |