log_parser/process_log.py - Issue 199005: Allow expectations for different graphs within tests....

Unified Diff: log_parser/process_log.py

Issue 199005: Allow expectations for different graphs within tests.... (Closed) Base URL: svn://chrome-svn.corp.google.com/chrome/trunk/tools/buildbot/scripts/master/

Patch Set: '' Created 11 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: log_parser/process_log.py

===================================================================

--- log_parser/process_log.py (revision 25340)

+++ log_parser/process_log.py (working copy)

@@ -37,7 +37,7 @@

# ...

# },

# ...,

-# "loaded": true

+# "load": true

# }

# PERFID (string):

@@ -118,9 +118,8 @@

# Performance regression/speedup alerts.

self._perf_name = perf_name

- self._actual_performance = None

- self._expected_performance = None

- self._result_types = []

+ self._actual_performance = {}

+ self._expected_performance = {}

self._perf_regress = []

self._var_regress = []

self._perf_improve = []

@@ -129,6 +128,26 @@

# The revision isn't known until the Process() call.

self._revision = -1

+ def LoadPerformanceExpectationsGroup(self, perf_data):

+ """All keys in perf_expectations have 4 components:

+ slave/test/graph/result

+ LoadPerformanceExpectationsGroup finds all keys that match the initial

+ portion of the string ("slave/test") and adds the graph and result

+ portions to the expected performance structure.

+ """

+ for perf_key in perf_data.keys():

+ m = re.match(r"^" + self._perf_name + "/(\w+)/(\w+)$", perf_key)

+ if not m:

+ continue

+ graph_name = m.group(1)

+ result_type = m.group(2)

+ self._expected_performance.setdefault(graph_name, {})

+ self._expected_performance[graph_name][result_type] = perf_data[perf_key]

def LoadPerformanceExpectations(self):

self._expected = {}

try:

@@ -136,10 +155,10 @@

except IOError, e:

raise

- perf_list = []

+ perf_data = []

if perf_file:

try:

- perf_list = simplejson.load(perf_file)

+ perf_data = simplejson.load(perf_file)

except ValueError:

perf_file.seek(0)

logging.error("Error parsing %s: '%s'" % (PERF_EXPECTATIONS,

@@ -147,9 +166,43 @@

perf_file.close()

# Find this perf/test entry

- if perf_list and perf_list.has_key(self._perf_name):

- self._expected_performance = perf_list[self._perf_name]

+ if perf_data and perf_data.has_key('load') and perf_data['load']:

+ self.LoadPerformanceExpectationsGroup(perf_data)

+ else:

+ logging.error("not loading perf expectations, perf_data is disabled")

+ def TrackActualPerformance(self, graph_name=None, result_type=None,

+ value=None, stddev=None):

+ """Set actual performance data when we come across useful values.

+ result_type will be of the form "RESULTTYPE" or "RESULTTYPE_ref".

+ A trace with _ref in its name refers to a reference build.

+ Common result types for page cyclers: t, vm_rss_f_r, IO_b_b, etc.

+ A test's result types vary between test types. Currently, a test

+ only needs to output the appropriate text format to embed a new

+ result type.

+ """

+ self._actual_performance.setdefault(graph_name, {})

+ m = re.match(r"^(\w+)_ref$", result_type)

+ if m: # a test on the reference build

+ result_type = m.group(1)

+ self._actual_performance[graph_name].setdefault(result_type, {})

+ actual = self._actual_performance[graph_name][result_type]

+ if m: # a test on the reference build

+ actual['ref'] = value

+ else: # a test on the current build

+ actual['test'] = value

+ actual['var'] = stddev

+ # If we have both the current and ref results, compute the delta for this

+ # result type.

+ if 'test' in actual and 'ref' in actual:

+ actual['delta'] = actual['test'] - actual['ref']

def PerformanceChangesAsText(self):

text = []

@@ -167,44 +220,60 @@

return text

- def PerformanceChanges(self):

- # Load performance expectations for this test.

- self.LoadPerformanceExpectations()

+ def ComparePerformance(self, graph_name, result_type):

+ # Skip graphs and result types we don't expect values for.

+ if (not graph_name in self._expected_performance or

+ not result_type in self._expected_performance[graph_name]):

+ return

- # Return if no performance expectations or results were found.

- if not self._expected_performance or not self._actual_performance:

- return []

+ expected = self._expected_performance[graph_name][result_type]

+ actual = self._actual_performance[graph_name][result_type]

+ graph_result = graph_name + '/' + result_type

- # Compare actual and expected results.

- for type in self._result_types:

- if not (self._expected_performance.has_key(type) and

- self._actual_performance.has_key(type)):

- # Skip result types we don't know about.

- continue

+ # Skip result types that didn't calculate a delta.

+ if not 'delta' in actual:

+ return

- expect = self._expected_performance[type]

- actual = self._actual_performance[type]

+ # Exception when missing delta/var in expectations.

+ if not 'delta' in expected or not 'var' in expected:

M-A Ruel 2009/09/03 23:43:41 nit: Why do you care at all if you want to throw a

+ missing = []

+ if not 'delta' in expected:

+ missing.append('delta')

+ if not 'var' in expected:

+ missing.append('var')

+ msg = "%s/%s expectations missing %s" % (self._perf_name, graph_result,

+ ', '.join(missing))

+ raise Exception(msg)

- # Set the high and low performance and variance tolerances. The actual

- # delta and variance needs to be within 50% above and below this range to

- # keep the performance test green. If the results fall above or below

- # this range, the test will go red (signaling a regression) or orange

- # (signaling a speedup).

- high_perf = (expect['delta'] + 1.5*expect['var'])

- low_perf = (expect['delta'] - 1.5*expect['var'])

- high_var = 1.5*expect['var']

- low_var = 0.5*expect['var']

+ # Set the high and low performance and variance tolerances. The actual

+ # delta and variance needs to be within 50% above and below this range to

+ # keep the performance test green. If the results fall above or below

+ # this range, the test will go red (signaling a regression) or orange

+ # (signaling a speedup).

+ high_perf = (expected['delta'] + 1.5*expected['var'])

+ low_perf = (expected['delta'] - 1.5*expected['var'])

+ high_var = 1.5*expected['var']

+ low_var = 0.5*expected['var']

- if actual['delta'] > high_perf:

- self._perf_regress.append(type)

- elif actual['delta'] < low_perf:

- self._perf_improve.append(type)

+ if actual['delta'] > high_perf:

+ self._perf_regress.append(graph_result)

+ elif actual['delta'] < low_perf:

+ self._perf_improve.append(graph_result)

- if actual['var'] > high_var:

- self._var_regress.append(type)

- elif actual['var'] < low_var:

- self._var_improve.append(type)

+ if actual['var'] > high_var:

+ self._var_regress.append(graph_result)

+ elif actual['var'] < low_var:

+ self._var_improve.append(graph_result)

+ def PerformanceChanges(self):

+ # Load performance expectations for this test.

+ self.LoadPerformanceExpectations()

+ # Compare actual and expected results.

+ for graph_name in self._actual_performance:

+ for result_type in self._actual_performance[graph_name]:

+ self.ComparePerformance(graph_name, result_type)

return self.PerformanceChangesAsText()

def evaluateCommand(self, cmd):

@@ -520,43 +589,10 @@

graph.traces[trace_name] = trace

self._graphs[graph_name] = graph

- # Set actual performance data when we come across useful values.

- #

- # trace_name will be of the form "RESULTTYPE" or "RESULTTYPE_ref".

- # A trace with _ref in its name refers to a reference build.

- #

- # Common result types for page cyclers: t, vm_rss_f_r, IO_b_b, etc.

- # A test's result types vary between test types. Currently, a test

- # only needs to output the appropriate text format to embed a new

- # result type.

+ # Store values in actual performance.

+ self.TrackActualPerformance(graph_name=graph_name, result_type=trace_name,

+ value=trace.value, stddev=trace.stddev)

- m = re.match(r"^(\w+)_ref$", trace_name)

- if m:

- is_ref_build = True

- result_type = m.group(1)

- else:

- is_ref_build = False

- result_type = trace_name

- if not self._actual_performance:

- self._actual_performance = {}

- if not self._actual_performance.has_key(result_type):

- self._actual_performance[result_type] = {}

- actual = self._actual_performance[result_type]

- if is_ref_build:

- actual['ref'] = trace.value

- else:

- actual['test'] = trace.value

- actual['var'] = trace.stddev

- # If we have both the test and ref results, compute the delta for this

- # result type.

- if actual.has_key('test') and actual.has_key('ref'):

- self._result_types.append(result_type)

- actual['delta'] = actual['test'] - actual['ref']

def _CalculateStatistics(self, value_list, trace_name):

"""Returns a tuple (mean, standard deviation) from a list of values.

« no previous file with comments | « no previous file | no next file » | no next file with comments »