OLD | NEW |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """General statistical or mathematical functions.""" | 5 """General statistical or mathematical functions.""" |
6 | 6 |
7 import math | 7 import math |
8 | 8 |
9 from slave import recipe_api | 9 from slave import recipe_api |
10 | 10 |
| 11 |
11 class MathUtilsApi(recipe_api.RecipeApi): | 12 class MathUtilsApi(recipe_api.RecipeApi): |
12 | 13 |
13 @staticmethod | 14 @staticmethod |
14 def truncated_mean(data_set, truncate_fraction): | 15 def truncated_mean(data_set, truncate_fraction): |
15 """Calculates the truncated mean of a set of values. | 16 """Calculates the truncated mean of a set of values. |
16 | 17 |
17 Note that this isn't just the mean of the set of values with the highest | 18 Note that this isn't just the mean of the set of values with the highest |
18 and lowest values discarded; the non-discarded values are also weighted | 19 and lowest values discarded; the non-discarded values are also weighted |
19 differently depending how many values are discarded. | 20 differently depending how many values are discarded. |
20 | 21 |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
131 def standard_error(values): | 132 def standard_error(values): |
132 """Calculates the standard error of a list of values.""" | 133 """Calculates the standard error of a list of values.""" |
133 if len(values) <= 1: | 134 if len(values) <= 1: |
134 return 0.0 | 135 return 0.0 |
135 std_dev = MathUtilsApi.standard_deviation(values) | 136 std_dev = MathUtilsApi.standard_deviation(values) |
136 return std_dev / math.sqrt(len(values)) | 137 return std_dev / math.sqrt(len(values)) |
137 | 138 |
138 #Copied this from BisectResults | 139 #Copied this from BisectResults |
139 @staticmethod | 140 @staticmethod |
140 def confidence_score(sample1, sample2, | 141 def confidence_score(sample1, sample2, |
141 accept_single_bad_or_good=False): | 142 accept_single_bad_or_good=False): |
142 """Calculates a confidence score. | 143 """Calculates a confidence score. |
143 | 144 |
144 This score is a percentage which represents our degree of confidence in the | 145 This score is a percentage which represents our degree of confidence in the |
145 proposition that the good results and bad results are distinct groups, and | 146 proposition that the good results and bad results are distinct groups, and |
146 their differences aren't due to chance alone. | 147 their differences aren't due to chance alone. |
147 | 148 |
148 | 149 |
149 Args: | 150 Args: |
150 sample1: A flat list of "good" result numbers. | 151 sample1: A flat list of "good" result numbers. |
151 sample2: A flat list of "bad" result numbers. | 152 sample2: A flat list of "bad" result numbers. |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
244 if v1 == 0 and v2 == 0: | 245 if v1 == 0 and v2 == 0: |
245 return 1 | 246 return 1 |
246 # If the sample size is too small, also return the minimum (1). | 247 # If the sample size is too small, also return the minimum (1). |
247 if n1 <= 1 or n2 <= 2: | 248 if n1 <= 1 or n2 <= 2: |
248 return 1 | 249 return 1 |
249 df = (((v1 / n1 + v2 / n2) ** 2) / | 250 df = (((v1 / n1 + v2 / n2) ** 2) / |
250 ((v1 ** 2) / ((n1 ** 2) * (n1 - 1)) + | 251 ((v1 ** 2) / ((n1 ** 2) * (n1 - 1)) + |
251 (v2 ** 2) / ((n2 ** 2) * (n2 - 1)))) | 252 (v2 ** 2) / ((n2 ** 2) * (n2 - 1)))) |
252 return max(1, df) | 253 return max(1, df) |
253 | 254 |
254 | |
255 # Below is a hard-coded table for looking up p-values. | 255 # Below is a hard-coded table for looking up p-values. |
256 # | 256 # |
257 # Normally, p-values are calculated based on the t-distribution formula. | 257 # Normally, p-values are calculated based on the t-distribution formula. |
258 # Looking up pre-calculated values is a less accurate but less complicated | 258 # Looking up pre-calculated values is a less accurate but less complicated |
259 # alternative. | 259 # alternative. |
260 # | 260 # |
261 # Reference: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf | 261 # Reference: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf |
262 | 262 |
263 # A list of p-values for a two-tailed test. The entries correspond to to | 263 # A list of p-values for a two-tailed test. The entries correspond to to |
264 # entries in the rows of the table below. | 264 # entries in the rows of the table below. |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
348 lesser_equal = [n for n in nums if n <= target] | 348 lesser_equal = [n for n in nums if n <= target] |
349 assert lesser_equal, 'No number in number list <= target.' | 349 assert lesser_equal, 'No number in number list <= target.' |
350 return max(lesser_equal) | 350 return max(lesser_equal) |
351 | 351 |
352 df_key = greatest_smaller(MathUtilsApi.TABLE.keys(), df) | 352 df_key = greatest_smaller(MathUtilsApi.TABLE.keys(), df) |
353 t_table_row = MathUtilsApi.TABLE[df_key] | 353 t_table_row = MathUtilsApi.TABLE[df_key] |
354 approximate_t_value = greatest_smaller(t_table_row, t) | 354 approximate_t_value = greatest_smaller(t_table_row, t) |
355 t_value_index = t_table_row.index(approximate_t_value) | 355 t_value_index = t_table_row.index(approximate_t_value) |
356 | 356 |
357 return MathUtilsApi.TWO_TAIL[t_value_index] | 357 return MathUtilsApi.TWO_TAIL[t_value_index] |
OLD | NEW |