| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """General statistical or mathematical functions.""" | 5 """General statistical or mathematical functions.""" |
| 6 | 6 |
| 7 import math | 7 import math |
| 8 | 8 |
| 9 from slave import recipe_api | 9 from slave import recipe_api |
| 10 | 10 |
| 11 |
| 11 class MathUtilsApi(recipe_api.RecipeApi): | 12 class MathUtilsApi(recipe_api.RecipeApi): |
| 12 | 13 |
| 13 @staticmethod | 14 @staticmethod |
| 14 def truncated_mean(data_set, truncate_fraction): | 15 def truncated_mean(data_set, truncate_fraction): |
| 15 """Calculates the truncated mean of a set of values. | 16 """Calculates the truncated mean of a set of values. |
| 16 | 17 |
| 17 Note that this isn't just the mean of the set of values with the highest | 18 Note that this isn't just the mean of the set of values with the highest |
| 18 and lowest values discarded; the non-discarded values are also weighted | 19 and lowest values discarded; the non-discarded values are also weighted |
| 19 differently depending how many values are discarded. | 20 differently depending how many values are discarded. |
| 20 | 21 |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 131 def standard_error(values): | 132 def standard_error(values): |
| 132 """Calculates the standard error of a list of values.""" | 133 """Calculates the standard error of a list of values.""" |
| 133 if len(values) <= 1: | 134 if len(values) <= 1: |
| 134 return 0.0 | 135 return 0.0 |
| 135 std_dev = MathUtilsApi.standard_deviation(values) | 136 std_dev = MathUtilsApi.standard_deviation(values) |
| 136 return std_dev / math.sqrt(len(values)) | 137 return std_dev / math.sqrt(len(values)) |
| 137 | 138 |
| 138 #Copied this from BisectResults | 139 #Copied this from BisectResults |
| 139 @staticmethod | 140 @staticmethod |
| 140 def confidence_score(sample1, sample2, | 141 def confidence_score(sample1, sample2, |
| 141 accept_single_bad_or_good=False): | 142 accept_single_bad_or_good=False): |
| 142 """Calculates a confidence score. | 143 """Calculates a confidence score. |
| 143 | 144 |
| 144 This score is a percentage which represents our degree of confidence in the | 145 This score is a percentage which represents our degree of confidence in the |
| 145 proposition that the good results and bad results are distinct groups, and | 146 proposition that the good results and bad results are distinct groups, and |
| 146 their differences aren't due to chance alone. | 147 their differences aren't due to chance alone. |
| 147 | 148 |
| 148 | 149 |
| 149 Args: | 150 Args: |
| 150 sample1: A flat list of "good" result numbers. | 151 sample1: A flat list of "good" result numbers. |
| 151 sample2: A flat list of "bad" result numbers. | 152 sample2: A flat list of "bad" result numbers. |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 244 if v1 == 0 and v2 == 0: | 245 if v1 == 0 and v2 == 0: |
| 245 return 1 | 246 return 1 |
| 246 # If the sample size is too small, also return the minimum (1). | 247 # If the sample size is too small, also return the minimum (1). |
| 247 if n1 <= 1 or n2 <= 2: | 248 if n1 <= 1 or n2 <= 2: |
| 248 return 1 | 249 return 1 |
| 249 df = (((v1 / n1 + v2 / n2) ** 2) / | 250 df = (((v1 / n1 + v2 / n2) ** 2) / |
| 250 ((v1 ** 2) / ((n1 ** 2) * (n1 - 1)) + | 251 ((v1 ** 2) / ((n1 ** 2) * (n1 - 1)) + |
| 251 (v2 ** 2) / ((n2 ** 2) * (n2 - 1)))) | 252 (v2 ** 2) / ((n2 ** 2) * (n2 - 1)))) |
| 252 return max(1, df) | 253 return max(1, df) |
| 253 | 254 |
| 254 | |
| 255 # Below is a hard-coded table for looking up p-values. | 255 # Below is a hard-coded table for looking up p-values. |
| 256 # | 256 # |
| 257 # Normally, p-values are calculated based on the t-distribution formula. | 257 # Normally, p-values are calculated based on the t-distribution formula. |
| 258 # Looking up pre-calculated values is a less accurate but less complicated | 258 # Looking up pre-calculated values is a less accurate but less complicated |
| 259 # alternative. | 259 # alternative. |
| 260 # | 260 # |
| 261 # Reference: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf | 261 # Reference: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf |
| 262 | 262 |
| 263 # A list of p-values for a two-tailed test. The entries correspond to to | 263 # A list of p-values for a two-tailed test. The entries correspond to to |
| 264 # entries in the rows of the table below. | 264 # entries in the rows of the table below. |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 348 lesser_equal = [n for n in nums if n <= target] | 348 lesser_equal = [n for n in nums if n <= target] |
| 349 assert lesser_equal, 'No number in number list <= target.' | 349 assert lesser_equal, 'No number in number list <= target.' |
| 350 return max(lesser_equal) | 350 return max(lesser_equal) |
| 351 | 351 |
| 352 df_key = greatest_smaller(MathUtilsApi.TABLE.keys(), df) | 352 df_key = greatest_smaller(MathUtilsApi.TABLE.keys(), df) |
| 353 t_table_row = MathUtilsApi.TABLE[df_key] | 353 t_table_row = MathUtilsApi.TABLE[df_key] |
| 354 approximate_t_value = greatest_smaller(t_table_row, t) | 354 approximate_t_value = greatest_smaller(t_table_row, t) |
| 355 t_value_index = t_table_row.index(approximate_t_value) | 355 t_value_index = t_table_row.index(approximate_t_value) |
| 356 | 356 |
| 357 return MathUtilsApi.TWO_TAIL[t_value_index] | 357 return MathUtilsApi.TWO_TAIL[t_value_index] |
| OLD | NEW |