tools/bisect-perf-regression_test.py - Issue 434043002: Refactor bisect-perf-regression_test.py and add smoke test.

Unified Diff: tools/bisect-perf-regression_test.py

Issue 434043002: Refactor bisect-perf-regression_test.py and add smoke test. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Rebased. Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/bisect-perf-regression_test.py

diff --git a/tools/bisect-perf-regression_test.py b/tools/bisect-perf-regression_test.py

index 1f8fc5711a9ae50956c1138eae46ee3c3ca04951..8a1a250d4df59d977c2617f0cb884e58289afdd2 100644

--- a/tools/bisect-perf-regression_test.py

+++ b/tools/bisect-perf-regression_test.py

@@ -9,88 +9,56 @@ from auto_bisect import source_control as source_control_module

# Special import necessary because filename contains dash characters.

bisect_perf_module = __import__('bisect-perf-regression')

-# Sample output for a performance test used in the results parsing tests below.

-RESULTS_OUTPUT = """RESULT write_operations: write_operations= 23089 count

-RESULT read_bytes_gpu: read_bytes_gpu= 35201 kb

-RESULT write_bytes_gpu: write_bytes_gpu= 542 kb

-RESULT telemetry_page_measurement_results: num_failed= 0 count

-RESULT telemetry_page_measurement_results: num_errored= 0 count

-*RESULT Total: Total_ref= %(value)s

-"""

-# Some private methods of the bisect-perf-regression module are tested below.

-# pylint: disable=W0212

class BisectPerfRegressionTest(unittest.TestCase):

- """Test case for top-level functions in the bisect-perf-regrssion module."""

- def setUp(self):

- """Sets up the test environment before each test method."""

- pass

- def tearDown(self):

- """Cleans up the test environment after each test method."""

- pass

- def testConfidenceScoreHigh(self):

- """Tests the confidence calculation."""

- bad_values = [[0, 1, 1], [1, 2, 2]]

- good_values = [[1, 2, 2], [3, 3, 4]]

- confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

- self.assertEqual(95.0, confidence)

- def testConfidenceScoreNotSoHigh(self):

- """Tests the confidence calculation."""

- bad_values = [[0, 1, 1], [1, 2, 2]]

- good_values = [[1, 1, 1], [3, 3, 4]]

- # The good and bad groups are closer together than in the above test,

- # so the confidence that they're different is a little lower.

- confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

- self.assertEqual(80.0, confidence)

- def testConfidenceScoreZero(self):

- """Tests the confidence calculation when it's expected to be 0."""

- bad_values = [[4, 5], [7, 6], [8, 7]]

- good_values = [[8, 7], [6, 7], [5, 4]]

+ """Test case for other functions and classes in bisect-perf-regression.py."""

+ def _AssertConfidence(self, score, bad_values, good_values):

+ """Checks whether the given sets of values have a given confidence score.

+ The score represents our confidence that the two sets of values wouldn't

+ be as different as they are just by chance; that is, that some real change

+ occurred between the two sets of values.

+ Args:

+ score: Expected confidence score.

+ bad_values: First list of numbers.

+ good_values: Second list of numbers.

+ """

+ # ConfidenceScore takes a list of lists but these lists are flattened.

+ confidence = bisect_perf_module.ConfidenceScore([bad_values], [good_values])

+ self.assertEqual(score, confidence)

+ def testConfidenceScore_ZeroConfidence(self):

# The good and bad sets contain the same values, so the confidence that

# they're different should be zero.

- confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

- self.assertEqual(0.0, confidence)

- def testConfidenceScoreVeryHigh(self):

- """Tests the confidence calculation when it's expected to be high."""

- bad_values = [[1, 1], [1, 1]]

- good_values = [[1.2, 1.2], [1.2, 1.2]]

- confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

- self.assertEqual(99.9, confidence)

- def testConfidenceScoreImbalance(self):

- """Tests the confidence calculation one set of numbers is small."""

- bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]]

- good_values = [[1.4]]

- confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

- self.assertEqual(80.0, confidence)

- def testConfidenceScoreImbalance(self):

- """Tests the confidence calculation one set of numbers is empty."""

- bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]]

- good_values = []

- confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

- self.assertEqual(0.0, confidence)

- def testConfidenceScoreFunctionalTestResultsInconsistent(self):

- """Tests the confidence calculation when the numbers are just 0 and 1."""

- bad_values = [[1], [1], [0], [1], [1], [1], [0], [1]]

- good_values = [[0], [0], [1], [0], [1], [0]]

- confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

- self.assertEqual(80.0, confidence)

- def testConfidenceScoreFunctionalTestResultsConsistent(self):

- """Tests the confidence calculation when the numbers are 0 and 1."""

- bad_values = [[1], [1], [1], [1], [1], [1], [1], [1]]

- good_values = [[0], [0], [0], [0], [0], [0]]

- confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

- self.assertEqual(99.9, confidence)

+ self._AssertConfidence(0.0, [4, 5, 7, 6, 8, 7], [8, 7, 6, 7, 5, 4])

+ def testConfidenceScore_MediumConfidence(self):

+ self._AssertConfidence(80.0, [0, 1, 1, 1, 2, 2], [1, 1, 1, 3, 3, 4])

+ def testConfidenceScore_HighConfidence(self):

+ self._AssertConfidence(95.0, [0, 1, 1, 1, 2, 2], [1, 2, 2, 3, 3, 4])

+ def testConfidenceScore_VeryHighConfidence(self):

+ # Confidence is high if the two sets of values have no internal variance.

+ self._AssertConfidence(99.9, [1, 1, 1, 1], [1.2, 1.2, 1.2, 1.2])

+ self._AssertConfidence(99.9, [1, 1, 1, 1], [1.01, 1.01, 1.01, 1.01])

+ def testConfidenceScore_ImbalancedSampleSize(self):

+ # The second set of numbers only contains one number, so confidence is low.

+ self._AssertConfidence(

+ 80.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3],[1.4])

+ def testConfidenceScore_EmptySample(self):

+ # Confidence is zero if either or both samples are empty.

+ self._AssertConfidence(0.0, [], [])

+ self._AssertConfidence(0.0, [], [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3])

+ self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3], [])

+ def testConfidenceScore_FunctionalTestResults(self):

+ self._AssertConfidence(80.0, [1, 1, 0, 1, 1, 1, 0, 1], [0, 0, 1, 0, 1, 0])

+ self._AssertConfidence(99.9, [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0])

def testParseDEPSStringManually(self):

"""Tests DEPS parsing."""

@@ -108,171 +76,154 @@ class BisectPerfRegressionTest(unittest.TestCase):

'74697cf2064c0a2c0d7e1b1b28db439286766a05'

}"""

- # Should only expect svn/git revisions to come through, and urls should be

+ # Should only expect SVN/git revisions to come through, and URLs should be

# filtered out.

expected_vars_dict = {

'ffmpeg_hash': '@ac4a9f31fe2610bd146857bbd55d7a260003a888',

'webkit_rev': '@e01ac0a267d1017288bc67fa3c366b10469d8a24',

'angle_revision': '74697cf2064c0a2c0d7e1b1b28db439286766a05'

}

+ # Testing private function.

+ # pylint: disable=W0212

vars_dict = bisect_perf_module._ParseRevisionsFromDEPSFileManually(

deps_file_contents)

self.assertEqual(vars_dict, expected_vars_dict)

- def testTryParseResultValuesFromOutputWithSingleValue(self):

+ def _AssertParseResult(self, expected_values, result_string):

+ """Asserts some values are parsed from a RESULT line."""

+ results_template = ('RESULT other_chart: other_trace= 123 count\n'

+ 'RESULT my_chart: my_trace= %(value)s\n')

+ results = results_template % {'value': result_string}

+ metric = ['my_chart', 'my_trace']

+ # Testing private function.

+ # pylint: disable=W0212

+ values = bisect_perf_module._TryParseResultValuesFromOutput(metric, results)

+ self.assertEqual(expected_values, values)

+ def testTryParseResultValuesFromOutput_WithSingleValue(self):

"""Tests result pattern <*>RESULT <graph>: <trace>= <value>"""

- metrics = ['Total', 'Total_ref']

- self.assertEqual(

- [66.88], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '66.88 kb'}))

- self.assertEqual(

- [66.88], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '66.88kb'}))

- self.assertEqual(

- [66.88], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': ' 66.88 '}))

- self.assertEqual(

- [-66.88], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': ' -66.88 kb'}))

- self.assertEqual(

- [66], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '66 kb'}))

- self.assertEqual(

- [.66], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '.66 kb'}))

- self.assertEqual(

- [], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '. kb'}))

- self.assertEqual(

- [], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': 'aaa kb'}))

- def testTryParseResultValuesFromOutputWithMulitValue(self):

+ self._AssertParseResult([66.88], '66.88 kb')

+ self._AssertParseResult([66.88], '66.88 ')

+ self._AssertParseResult([-66.88], '-66.88 kb')

+ self._AssertParseResult([66], '66 kb')

+ self._AssertParseResult([0.66], '.66 kb')

+ self._AssertParseResult([], '. kb')

+ self._AssertParseResult([], 'aaa kb')

+ def testTryParseResultValuesFromOutput_WithMultiValue(self):

"""Tests result pattern <*>RESULT <graph>: <trace>= [<value>,<value>, ..]"""

- metrics = ['Total', 'Total_ref']

- self.assertEqual(

- [66.88], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '[66.88] kb'}))

- self.assertEqual(

- [66.88, 99.44], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '[66.88, 99.44]kb'}))

- self.assertEqual(

- [66.88, 99.44], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '[ 66.88, 99.44 ]'}))

- self.assertEqual(

- [-66.88, 99.44], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '[-66.88,99.44] kb'}))

- self.assertEqual(

- [-66, 99], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '[-66,99] kb'}))

- self.assertEqual(

- [-66, 99], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '[-66,99,] kb'}))

- self.assertEqual(

- [.66, .99], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '[.66,.99] kb'}))

- self.assertEqual(

- [], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '[] kb'}))

- self.assertEqual(

- [], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '[-66,abc] kb'}))

+ self._AssertParseResult([66.88], '[66.88] kb')

+ self._AssertParseResult([66.88, 99.44], '[66.88, 99.44]kb')

+ self._AssertParseResult([66.88, 99.44], '[ 66.88, 99.44 ]')

+ self._AssertParseResult([-66.88, 99.44], '[-66.88, 99.44] kb')

+ self._AssertParseResult([-66, 99], '[-66,99] kb')

+ self._AssertParseResult([-66, 99], '[-66,99,] kb')

+ self._AssertParseResult([-66, 0.99], '[-66,.99] kb')

+ self._AssertParseResult([], '[] kb')

+ self._AssertParseResult([], '[-66,abc] kb')

def testTryParseResultValuesFromOutputWithMeanStd(self):

"""Tests result pattern <*>RESULT <graph>: <trace>= {<mean, std}"""

- metrics = ['Total', 'Total_ref']

- self.assertEqual(

- [33.22], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{33.22, 3.6} kb'}))

- self.assertEqual(

- [33.22], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{33.22,3.6}kb'}))

- self.assertEqual(

- [33.22], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{33.22,3.6} kb'}))

- self.assertEqual(

- [33.22], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{ 33.22,3.6 }kb'}))

- self.assertEqual(

- [-33.22], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{-33.22,3.6}kb'}))

- self.assertEqual(

- [22], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{22,6}kb'}))

- self.assertEqual(

- [.22], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{.22,6}kb'}))

- self.assertEqual(

- [], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{.22,6, 44}kb'}))

- self.assertEqual(

- [], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{}kb'}))

- self.assertEqual(

- [], bisect_perf_module._TryParseResultValuesFromOutput(

- metrics, RESULTS_OUTPUT % {'value': '{XYZ}kb'}))

- def testGetCompatibleCommand(self):

+ self._AssertParseResult([33.22], '{33.22, 3.6} kb')

+ self._AssertParseResult([33.22], '{33.22,3.6}kb')

+ self._AssertParseResult([33.22], '{33.22,3.6} kb')

+ self._AssertParseResult([33.22], '{ 33.22,3.6 }kb')

+ self._AssertParseResult([-33.22], '{-33.22,3.6}kb')

+ self._AssertParseResult([22], '{22,6}kb')

+ self._AssertParseResult([.22], '{.22,6}kb')

+ self._AssertParseResult([], '{.22,6, 44}kb')

+ self._AssertParseResult([], '{}kb')

+ self._AssertParseResult([], '{XYZ}kb')

+ def _AssertCompatibleCommand(

+ self, expected_command, original_command, revision, target_platform):

+ """Tests the modification of the command that might be done.

+ This modification to the command is done in order to get a Telemetry

+ command that works; before some revisions, the browser name that Telemetry

+ expects is different in some cases, but we want it to work anyway.

+ Specifically, only for android:

+ After r276628, only android-chrome-shell works.

+ Prior to r274857, only android-chromium-testshell works.

+ In the range [274857, 276628], both work.

+ """

bisect_options = bisect_perf_module.BisectOptions()

bisect_options.output_buildbot_annotations = None

source_control = source_control_module.DetermineAndCreateSourceControl(

bisect_options)

bisect_instance = bisect_perf_module.BisectPerformanceMetrics(

source_control, bisect_options)

- bisect_instance.opts.target_platform = 'android'

- depot = 'chromium'

- # android-chrome-shell -> android-chromium-testshell

- revision = 274857

- git_revision = bisect_instance.source_control.ResolveToRevision(

- revision, 'chromium', bisect_perf_module.DEPOT_DEPS_NAME, 100)

- command = ('tools/perf/run_benchmark -v '

- '--browser=android-chrome-shell page_cycler.intl_ja_zh')

- expected_command = ('tools/perf/run_benchmark -v --browser='

- 'android-chromium-testshell page_cycler.intl_ja_zh')

- self.assertEqual(

- bisect_instance.GetCompatibleCommand(command, git_revision, depot),

- expected_command)

- # android-chromium-testshell -> android-chromium-testshell

- revision = 274858

+ bisect_instance.opts.target_platform = target_platform

git_revision = bisect_instance.source_control.ResolveToRevision(

revision, 'chromium', bisect_perf_module.DEPOT_DEPS_NAME, 100)

- command = ('tools/perf/run_benchmark -v '

- '--browser=android-chromium-testshell page_cycler.intl_ja_zh')

- expected_command = ('tools/perf/run_benchmark -v --browser='

- 'android-chromium-testshell page_cycler.intl_ja_zh')

- self.assertEqual(

- bisect_instance.GetCompatibleCommand(command, git_revision, depot),

- expected_command)

- # android-chromium-testshell -> android-chrome-shell

- revision = 276628

- git_revision = bisect_instance.source_control.ResolveToRevision(

- revision, 'chromium', bisect_perf_module.DEPOT_DEPS_NAME, 100)

- command = ('tools/perf/run_benchmark -v '

- '--browser=android-chromium-testshell page_cycler.intl_ja_zh')

- expected_command = ('tools/perf/run_benchmark -v --browser='

- 'android-chrome-shell page_cycler.intl_ja_zh')

- self.assertEqual(

- bisect_instance.GetCompatibleCommand(command, git_revision, depot),

- expected_command)

- # android-chrome-shell -> android-chrome-shell

- command = ('tools/perf/run_benchmark -v '

- '--browser=android-chrome-shell page_cycler.intl_ja_zh')

- expected_command = ('tools/perf/run_benchmark -v --browser='

- 'android-chrome-shell page_cycler.intl_ja_zh')

- self.assertEqual(

- bisect_instance.GetCompatibleCommand(command, git_revision, depot),

- expected_command)

- # Not android platform

- bisect_instance.opts.target_platform = 'chromium'

- command = ('tools/perf/run_benchmark -v '

- '--browser=release page_cycler.intl_ja_zh')

- expected_command = ('tools/perf/run_benchmark -v --browser='

- 'release page_cycler.intl_ja_zh')

- self.assertEqual(

- bisect_instance.GetCompatibleCommand(command, git_revision, depot),

- expected_command)

+ depot = 'chromium'

+ command = bisect_instance.GetCompatibleCommand(

+ original_command, git_revision, depot)

+ self.assertEqual(expected_command, command)

+ def testGetCompatibleCommand_ChangeToTestShell(self):

+ # For revisions <= r274857, only android-chromium-testshell is used.

+ self._AssertCompatibleCommand(

+ 'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',

+ 'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',

+ 274857, 'android')

+ def testGetCompatibleCommand_ChangeToShell(self):

+ # For revisions >= r276728, only android-chrome-shell can be used.

+ self._AssertCompatibleCommand(

+ 'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',

+ 'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',

+ 276628, 'android')

+ def testGetCompatibleCommand_NoChange(self):

+ # For revisions < r276728, android-chromium-testshell can be used.

+ self._AssertCompatibleCommand(

+ 'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',

+ 274858, 'android')

+ # For revisions > r274857, android-chrome-shell can be used.

+ self._AssertCompatibleCommand(

+ 'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',

+ 274858, 'android')

+ def testGetCompatibleCommand_NonAndroidPlatform(self):

+ # In most cases, there's no need to change Telemetry command.

+ # For revisions >= r276728, only android-chrome-shell can be used.

+ self._AssertCompatibleCommand(

+ 'tools/perf/run_benchmark -v --browser=release foo',

+ 276628, 'chromium')

+ # This method doesn't reference self; it fails if an error is thrown.

+ # pylint: disable=R0201

+ def testDryRun(self):

+ """Does a dry run of the bisect script.

+ This serves as a smoke test to catch errors in the basic execution of the

+ script.

+ """

+ options_dict = {

+ 'debug_ignore_build': True,

+ 'debug_ignore_sync': True,

+ 'debug_ignore_perf_test': True,

+ 'command': 'fake_command',

+ 'metric': 'fake/metric',

+ 'good_revision': 280000,

+ 'bad_revision': 280005,

+ }

+ bisect_options = bisect_perf_module.BisectOptions.FromDict(options_dict)

+ source_control = source_control_module.DetermineAndCreateSourceControl(

+ bisect_options)

+ bisect_instance = bisect_perf_module.BisectPerformanceMetrics(

+ source_control, bisect_options)

+ results = bisect_instance.Run(bisect_options.command,

+ bisect_options.bad_revision,

+ bisect_options.good_revision,

+ bisect_options.metric)

+ bisect_instance.FormatAndPrintResults(results)

if __name__ == '__main__':

« no previous file with comments | « tools/bisect-perf-regression.py ('k') | no next file » | no next file with comments »