Index: tools/bisect-perf-regression_test.py |
diff --git a/tools/bisect-perf-regression_test.py b/tools/bisect-perf-regression_test.py |
index 913a851bcd3e054898c7eb5167abeb3c44c0dcd5..d4e88d24f64a568a2ffaf31344396f1d5cc7f49d 100644 |
--- a/tools/bisect-perf-regression_test.py |
+++ b/tools/bisect-perf-regression_test.py |
@@ -31,36 +31,65 @@ class BisectPerfRegressionTest(unittest.TestCase): |
"""Cleans up the test environment after each test method.""" |
pass |
- def testConfidenceScore(self): |
+ def testConfidenceScoreHigh(self): |
"""Tests the confidence calculation.""" |
- bad_values = [[0, 1], [1, 2]] |
- good_values = [[6, 7], [7, 8]] |
- # Closest means are mean(1, 2) and mean(6, 7). |
- distance = 6.5 - 1.5 |
- # Standard deviation of [n-1, n, n, n+1] is 0.8165. |
- stddev_sum = 0.8165 + 0.8165 |
- # Expected confidence is an int in the range [0, 100]. |
- expected_confidence = min(100, int(100 * distance / float(stddev_sum))) |
- self.assertEqual( |
- expected_confidence, |
- bisect_perf_module.ConfidenceScore(bad_values, good_values)) |
+ bad_values = [[0, 1, 1], [1, 2, 2]] |
+ good_values = [[1, 2, 2], [3, 3, 4]] |
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values) |
+ self.assertEqual(95.0, confidence) |
- def testConfidenceScoreZeroConfidence(self): |
+ def testConfidenceScoreNotSoHigh(self): |
+ """Tests the confidence calculation.""" |
+ bad_values = [[0, 1, 1], [1, 2, 2]] |
+ good_values = [[1, 1, 1], [3, 3, 4]] |
+ # The good and bad groups are closer together than in the above test, |
+ # so the confidence that they're different is a little lower. |
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values) |
+ self.assertEqual(80.0, confidence) |
+ |
+ def testConfidenceScoreZero(self): |
"""Tests the confidence calculation when it's expected to be 0.""" |
- bad_values = [[0, 1], [1, 2], [4, 5], [0, 2]] |
- good_values = [[4, 5], [6, 7], [7, 8]] |
- # Both groups have value lists with means of 4.5, which means distance |
- # between groups is zero, and thus confidence is zero. |
- self.assertEqual( |
- 0, bisect_perf_module.ConfidenceScore(bad_values, good_values)) |
+ bad_values = [[4, 5], [7, 6], [8, 7]] |
+ good_values = [[8, 7], [6, 7], [5, 4]] |
+ # The good and bad sets contain the same values, so the confidence that |
+ # they're different should be zero. |
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values) |
+ self.assertEqual(0.0, confidence) |
- def testConfidenceScoreMaxConfidence(self): |
- """Tests the confidence calculation when it's expected to be 100.""" |
+ def testConfidenceScoreVeryHigh(self): |
+ """Tests the confidence calculation when it's expected to be high.""" |
bad_values = [[1, 1], [1, 1]] |
good_values = [[1.2, 1.2], [1.2, 1.2]] |
- # Standard deviation in both groups is zero, so confidence is 100. |
- self.assertEqual( |
- 100, bisect_perf_module.ConfidenceScore(bad_values, good_values)) |
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values) |
+ self.assertEqual(99.9, confidence) |
+ |
+ def testConfidenceScoreImbalance(self): |
+ """Tests the confidence calculation one set of numbers is small.""" |
+ bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]] |
+ good_values = [[1.4]] |
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values) |
+ self.assertEqual(80.0, confidence) |
+ |
+ def testConfidenceScoreImbalance(self): |
+ """Tests the confidence calculation one set of numbers is empty.""" |
+ bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]] |
+ good_values = [] |
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values) |
+ self.assertEqual(0.0, confidence) |
+ |
+ def testConfidenceScoreFunctionalTestResultsInconsistent(self): |
+ """Tests the confidence calculation when the numbers are just 0 and 1.""" |
+ bad_values = [[1], [1], [0], [1], [1], [1], [0], [1]] |
+ good_values = [[0], [0], [1], [0], [1], [0]] |
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values) |
+ self.assertEqual(80.0, confidence) |
+ |
+ def testConfidenceScoreFunctionalTestResultsConsistent(self): |
+ """Tests the confidence calculation when the numbers are 0 and 1.""" |
+ bad_values = [[1], [1], [1], [1], [1], [1], [1], [1]] |
+ good_values = [[0], [0], [0], [0], [0], [0]] |
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values) |
+ self.assertEqual(99.9, confidence) |
def testParseDEPSStringManually(self): |
"""Tests DEPS parsing.""" |