OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Performance Test Bisect Tool | 6 """Performance Test Bisect Tool |
7 | 7 |
8 This script bisects a series of changelists using binary search. It starts at | 8 This script bisects a series of changelists using binary search. It starts at |
9 a bad revision where a performance metric has regressed, and asks for a last | 9 a bad revision where a performance metric has regressed, and asks for a last |
10 known-good revision. It will then binary search across this revision range by | 10 known-good revision. It will then binary search across this revision range by |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
48 import sys | 48 import sys |
49 import time | 49 import time |
50 import zipfile | 50 import zipfile |
51 | 51 |
52 sys.path.append(os.path.join(os.path.dirname(__file__), 'telemetry')) | 52 sys.path.append(os.path.join(os.path.dirname(__file__), 'telemetry')) |
53 | 53 |
54 from auto_bisect import bisect_utils | 54 from auto_bisect import bisect_utils |
55 from auto_bisect import math_utils | 55 from auto_bisect import math_utils |
56 from auto_bisect import post_perf_builder_job as bisect_builder | 56 from auto_bisect import post_perf_builder_job as bisect_builder |
57 from auto_bisect import source_control as source_control_module | 57 from auto_bisect import source_control as source_control_module |
| 58 from auto_bisect import ttest |
58 from telemetry.util import cloud_storage | 59 from telemetry.util import cloud_storage |
59 | 60 |
60 # The additional repositories that might need to be bisected. | 61 # The additional repositories that might need to be bisected. |
61 # If the repository has any dependant repositories (such as skia/src needs | 62 # If the repository has any dependant repositories (such as skia/src needs |
62 # skia/include and skia/gyp to be updated), specify them in the 'depends' | 63 # skia/include and skia/gyp to be updated), specify them in the 'depends' |
63 # so that they're synced appropriately. | 64 # so that they're synced appropriately. |
64 # Format is: | 65 # Format is: |
65 # src: path to the working directory. | 66 # src: path to the working directory. |
66 # recurse: True if this repositry will get bisected. | 67 # recurse: True if this repositry will get bisected. |
67 # depends: A list of other repositories that are actually part of the same | 68 # depends: A list of other repositories that are actually part of the same |
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
253 def _AddAdditionalDepotInfo(depot_info): | 254 def _AddAdditionalDepotInfo(depot_info): |
254 """Adds additional depot info to the global depot variables.""" | 255 """Adds additional depot info to the global depot variables.""" |
255 global DEPOT_DEPS_NAME | 256 global DEPOT_DEPS_NAME |
256 global DEPOT_NAMES | 257 global DEPOT_NAMES |
257 DEPOT_DEPS_NAME = dict(DEPOT_DEPS_NAME.items() + | 258 DEPOT_DEPS_NAME = dict(DEPOT_DEPS_NAME.items() + |
258 depot_info.items()) | 259 depot_info.items()) |
259 DEPOT_NAMES = DEPOT_DEPS_NAME.keys() | 260 DEPOT_NAMES = DEPOT_DEPS_NAME.keys() |
260 | 261 |
261 | 262 |
262 def ConfidenceScore(good_results_lists, bad_results_lists): | 263 def ConfidenceScore(good_results_lists, bad_results_lists): |
263 """Calculates a confidence percentage. | 264 """Calculates a confidence score. |
264 | 265 |
265 This is calculated based on how distinct the "good" and "bad" values are, | 266 This score is a percentage which represents our degree of confidence in the |
266 and how noisy the results are. More precisely, the confidence is the quotient | 267 proposition that the good results and bad results are distinct groups, and |
267 of the difference between the closest values across the good and bad groups | 268 their differences aren't due to chance alone. |
268 and the sum of the standard deviations of the good and bad groups. | |
269 | 269 |
270 TODO(qyearsley): Replace this confidence function with a function that | |
271 uses a Student's t-test. The confidence would be (1 - p-value), where | |
272 p-value is the probability of obtaining the given a set of good and bad | |
273 values just by chance. | |
274 | 270 |
275 Args: | 271 Args: |
276 good_results_lists: A list of lists of "good" result numbers. | 272 good_results_lists: A list of lists of "good" result numbers. |
277 bad_results_lists: A list of lists of "bad" result numbers. | 273 bad_results_lists: A list of lists of "bad" result numbers. |
278 | 274 |
279 Returns: | 275 Returns: |
280 A number between in the range [0, 100]. | 276 A number in the range [0, 100]. |
281 """ | 277 """ |
282 # Get the distance between the two groups. | 278 if not good_results_lists or not bad_results_lists: |
283 means_good = map(math_utils.Mean, good_results_lists) | 279 return 0.0 |
284 means_bad = map(math_utils.Mean, bad_results_lists) | |
285 bounds_good = (min(means_good), max(means_good)) | |
286 bounds_bad = (min(means_bad), max(means_bad)) | |
287 dist_between_groups = min( | |
288 math.fabs(bounds_bad[1] - bounds_good[0]), | |
289 math.fabs(bounds_bad[0] - bounds_good[1])) | |
290 | 280 |
291 # Get the sum of the standard deviations of the two groups. | 281 # Flatten the lists of results lists. |
292 good_results_flattened = sum(good_results_lists, []) | 282 sample1 = sum(good_results_lists, []) |
293 bad_results_flattened = sum(bad_results_lists, []) | 283 sample2 = sum(bad_results_lists, []) |
294 stddev_good = math_utils.StandardDeviation(good_results_flattened) | |
295 stddev_bad = math_utils.StandardDeviation(bad_results_flattened) | |
296 stddev_sum = stddev_good + stddev_bad | |
297 | 284 |
298 confidence = dist_between_groups / (max(0.0001, stddev_sum)) | 285 # The p-value is approximately the probability of obtaining the given set |
299 confidence = int(min(1.0, max(confidence, 0.0)) * 100.0) | 286 # of good and bad values just by chance. |
300 return confidence | 287 _, _, p_value = ttest.WelchsTTest(sample1, sample2) |
| 288 return 100.0 * (1.0 - p_value) |
301 | 289 |
302 | 290 |
303 def GetSHA1HexDigest(contents): | 291 def GetSHA1HexDigest(contents): |
304 """Returns secured hash containing hexadecimal for the given contents.""" | 292 """Returns secured hash containing hexadecimal for the given contents.""" |
305 return hashlib.sha1(contents).hexdigest() | 293 return hashlib.sha1(contents).hexdigest() |
306 | 294 |
307 | 295 |
308 def GetZipFileName(build_revision=None, target_arch='ia32', patch_sha=None): | 296 def GetZipFileName(build_revision=None, target_arch='ia32', patch_sha=None): |
309 """Gets the archive file name for the given revision.""" | 297 """Gets the archive file name for the given revision.""" |
310 def PlatformName(): | 298 def PlatformName(): |
(...skipping 3347 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3658 # bugs. If you change this, please update the perf dashboard as well. | 3646 # bugs. If you change this, please update the perf dashboard as well. |
3659 bisect_utils.OutputAnnotationStepStart('Results') | 3647 bisect_utils.OutputAnnotationStepStart('Results') |
3660 print 'Error: %s' % e.message | 3648 print 'Error: %s' % e.message |
3661 if opts.output_buildbot_annotations: | 3649 if opts.output_buildbot_annotations: |
3662 bisect_utils.OutputAnnotationStepClosed() | 3650 bisect_utils.OutputAnnotationStepClosed() |
3663 return 1 | 3651 return 1 |
3664 | 3652 |
3665 | 3653 |
3666 if __name__ == '__main__': | 3654 if __name__ == '__main__': |
3667 sys.exit(main()) | 3655 sys.exit(main()) |
OLD | NEW |