Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(550)

Unified Diff: tools/bisect-perf-regression_test.py

Issue 413393002: Use Welch's t-test to calculate confidence scores in the bisect script. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Add Chromium copyright notice to ttest.py. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/bisect-perf-regression.py ('k') | tools/run-bisect-perf-regression.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/bisect-perf-regression_test.py
diff --git a/tools/bisect-perf-regression_test.py b/tools/bisect-perf-regression_test.py
index 913a851bcd3e054898c7eb5167abeb3c44c0dcd5..d4e88d24f64a568a2ffaf31344396f1d5cc7f49d 100644
--- a/tools/bisect-perf-regression_test.py
+++ b/tools/bisect-perf-regression_test.py
@@ -31,36 +31,65 @@ class BisectPerfRegressionTest(unittest.TestCase):
"""Cleans up the test environment after each test method."""
pass
- def testConfidenceScore(self):
+ def testConfidenceScoreHigh(self):
"""Tests the confidence calculation."""
- bad_values = [[0, 1], [1, 2]]
- good_values = [[6, 7], [7, 8]]
- # Closest means are mean(1, 2) and mean(6, 7).
- distance = 6.5 - 1.5
- # Standard deviation of [n-1, n, n, n+1] is 0.8165.
- stddev_sum = 0.8165 + 0.8165
- # Expected confidence is an int in the range [0, 100].
- expected_confidence = min(100, int(100 * distance / float(stddev_sum)))
- self.assertEqual(
- expected_confidence,
- bisect_perf_module.ConfidenceScore(bad_values, good_values))
+ bad_values = [[0, 1, 1], [1, 2, 2]]
+ good_values = [[1, 2, 2], [3, 3, 4]]
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
+ self.assertEqual(95.0, confidence)
- def testConfidenceScoreZeroConfidence(self):
+ def testConfidenceScoreNotSoHigh(self):
+ """Tests the confidence calculation."""
+ bad_values = [[0, 1, 1], [1, 2, 2]]
+ good_values = [[1, 1, 1], [3, 3, 4]]
+ # The good and bad groups are closer together than in the above test,
+ # so the confidence that they're different is a little lower.
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
+ self.assertEqual(80.0, confidence)
+
+ def testConfidenceScoreZero(self):
"""Tests the confidence calculation when it's expected to be 0."""
- bad_values = [[0, 1], [1, 2], [4, 5], [0, 2]]
- good_values = [[4, 5], [6, 7], [7, 8]]
- # Both groups have value lists with means of 4.5, which means distance
- # between groups is zero, and thus confidence is zero.
- self.assertEqual(
- 0, bisect_perf_module.ConfidenceScore(bad_values, good_values))
+ bad_values = [[4, 5], [7, 6], [8, 7]]
+ good_values = [[8, 7], [6, 7], [5, 4]]
+ # The good and bad sets contain the same values, so the confidence that
+ # they're different should be zero.
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
+ self.assertEqual(0.0, confidence)
- def testConfidenceScoreMaxConfidence(self):
- """Tests the confidence calculation when it's expected to be 100."""
+ def testConfidenceScoreVeryHigh(self):
+ """Tests the confidence calculation when it's expected to be high."""
bad_values = [[1, 1], [1, 1]]
good_values = [[1.2, 1.2], [1.2, 1.2]]
- # Standard deviation in both groups is zero, so confidence is 100.
- self.assertEqual(
- 100, bisect_perf_module.ConfidenceScore(bad_values, good_values))
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
+ self.assertEqual(99.9, confidence)
+
+ def testConfidenceScoreImbalance(self):
+ """Tests the confidence calculation one set of numbers is small."""
+ bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]]
+ good_values = [[1.4]]
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
+ self.assertEqual(80.0, confidence)
+
+ def testConfidenceScoreImbalance(self):
+ """Tests the confidence calculation one set of numbers is empty."""
+ bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]]
+ good_values = []
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
+ self.assertEqual(0.0, confidence)
+
+ def testConfidenceScoreFunctionalTestResultsInconsistent(self):
+ """Tests the confidence calculation when the numbers are just 0 and 1."""
+ bad_values = [[1], [1], [0], [1], [1], [1], [0], [1]]
+ good_values = [[0], [0], [1], [0], [1], [0]]
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
+ self.assertEqual(80.0, confidence)
+
+ def testConfidenceScoreFunctionalTestResultsConsistent(self):
+ """Tests the confidence calculation when the numbers are 0 and 1."""
+ bad_values = [[1], [1], [1], [1], [1], [1], [1], [1]]
+ good_values = [[0], [0], [0], [0], [0], [0]]
+ confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
+ self.assertEqual(99.9, confidence)
def testParseDEPSStringManually(self):
"""Tests DEPS parsing."""
« no previous file with comments | « tools/bisect-perf-regression.py ('k') | tools/run-bisect-perf-regression.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698