log_parser/process_log.py - Issue 199005: Allow expectations for different graphs within tests....

Side by Side Diff: log_parser/process_log.py

Issue 199005: Allow expectations for different graphs within tests.... (Closed) Base URL: svn://chrome-svn.corp.google.com/chrome/trunk/tools/buildbot/scripts/master/

Patch Set: '' Created 11 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/python	1 #!/usr/bin/python

2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Defines various log processors used by buildbot steps.	6 """Defines various log processors used by buildbot steps.

7	7

8 Current approach is to set an instance of log processor in	8 Current approach is to set an instance of log processor in

9 the ProcessLogTestStep implementation and it will call process()	9 the ProcessLogTestStep implementation and it will call process()

10 method upon completion with full data from process stdio.	10 method upon completion with full data from process stdio.

(...skipping 19 matching lines...) Expand all Loading...
30	30

31 # perf_expectations.json holds performance expectations. It is a	31 # perf_expectations.json holds performance expectations. It is a

32 # JSON-formatted file with this format:	32 # JSON-formatted file with this format:

33 #	33 #

34 # {PERFID: {	34 # {PERFID: {

35 # RESULTTYPE: {"delta": DELTA, "var": VAR},	35 # RESULTTYPE: {"delta": DELTA, "var": VAR},

36 # RESULTTYPE: {"delta": DELTA, "var": VAR},	36 # RESULTTYPE: {"delta": DELTA, "var": VAR},

37 # ...	37 # ...

38 # },	38 # },

39 # ...,	39 # ...,

40 # "loaded": true	40 # "load": true

41 # }	41 # }

42 #	42 #

43 # PERFID (string):	43 # PERFID (string):

44 # Perf mapping identifier of the form "build-perf-name/test-name",	44 # Perf mapping identifier of the form "build-perf-name/test-name",

45 # (see factory/chromium_commands.py).	45 # (see factory/chromium_commands.py).

46 #	46 #

47 # RESULTTYPE (string):	47 # RESULTTYPE (string):

48 # A particular trace within a test (ie. t, vm_rss_f_r).	48 # A particular trace within a test (ie. t, vm_rss_f_r).

49 #	49 #

50 # DELTA (integer):	50 # DELTA (integer):

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
111 self._report_link = report_link	111 self._report_link = report_link

112 if output_dir is None:	112 if output_dir is None:

113 output_dir = os.getcwd()	113 output_dir = os.getcwd()

114 elif output_dir.startswith('~'):	114 elif output_dir.startswith('~'):

115 output_dir = os.path.expanduser(output_dir)	115 output_dir = os.path.expanduser(output_dir)

116 self._output_dir = output_dir	116 self._output_dir = output_dir

117 self._matches = {}	117 self._matches = {}

118	118

119 # Performance regression/speedup alerts.	119 # Performance regression/speedup alerts.

120 self._perf_name = perf_name	120 self._perf_name = perf_name

121 self._actual_performance = None	121 self._actual_performance = {}

122 self._expected_performance = None	122 self._expected_performance = {}

123 self._result_types = []

124 self._perf_regress = []	123 self._perf_regress = []

125 self._var_regress = []	124 self._var_regress = []

126 self._perf_improve = []	125 self._perf_improve = []

127 self._var_improve = []	126 self._var_improve = []

128	127

129 # The revision isn't known until the Process() call.	128 # The revision isn't known until the Process() call.

130 self._revision = -1	129 self._revision = -1

131	130

	131 def LoadPerformanceExpectationsGroup(self, perf_data):

	132 """All keys in perf_expectations have 4 components:

	133 slave/test/graph/result

	134

	135 LoadPerformanceExpectationsGroup finds all keys that match the initial

	136 portion of the string ("slave/test") and adds the graph and result

	137 portions to the expected performance structure.

	138 """

	139

	140 for perf_key in perf_data.keys():

	141 m = re.match(r"^" + self._perf_name + "/(\w+)/(\w+)$", perf_key)

	142 if not m:

	143 continue

	144

	145 graph_name = m.group(1)

	146 result_type = m.group(2)

	147

	148 self._expected_performance.setdefault(graph_name, {})

	149 self._expected_performance[graph_name][result_type] = perf_data[perf_key]

	150

132 def LoadPerformanceExpectations(self):	151 def LoadPerformanceExpectations(self):

133 self._expected = {}	152 self._expected = {}

134 try:	153 try:

135 perf_file = open(PERF_EXPECTATIONS, 'r')	154 perf_file = open(PERF_EXPECTATIONS, 'r')

136 except IOError, e:	155 except IOError, e:

137 raise	156 raise

138	157

139 perf_list = []	158 perf_data = []

140 if perf_file:	159 if perf_file:

141 try:	160 try:

142 perf_list = simplejson.load(perf_file)	161 perf_data = simplejson.load(perf_file)

143 except ValueError:	162 except ValueError:

144 perf_file.seek(0)	163 perf_file.seek(0)

145 logging.error("Error parsing %s: '%s'" % (PERF_EXPECTATIONS,	164 logging.error("Error parsing %s: '%s'" % (PERF_EXPECTATIONS,

146 perf_file.read().strip()))	165 perf_file.read().strip()))

147 perf_file.close()	166 perf_file.close()

148	167

149 # Find this perf/test entry	168 # Find this perf/test entry

150 if perf_list and perf_list.has_key(self._perf_name):	169 if perf_data and perf_data.has_key('load') and perf_data['load']:

151 self._expected_performance = perf_list[self._perf_name]	170 self.LoadPerformanceExpectationsGroup(perf_data)

	171 else:

	172 logging.error("not loading perf expectations, perf_data is disabled")

	173

	174 def TrackActualPerformance(self, graph_name=None, result_type=None,

	175 value=None, stddev=None):

	176 """Set actual performance data when we come across useful values.

	177

	178 result_type will be of the form "RESULTTYPE" or "RESULTTYPE_ref".

	179 A trace with _ref in its name refers to a reference build.

	180

	181 Common result types for page cyclers: t, vm_rss_f_r, IO_b_b, etc.

	182 A test's result types vary between test types. Currently, a test

	183 only needs to output the appropriate text format to embed a new

	184 result type.

	185 """

	186

	187 self._actual_performance.setdefault(graph_name, {})

	188 m = re.match(r"^(\w+)_ref$", result_type)

	189 if m: # a test on the reference build

	190 result_type = m.group(1)

	191

	192 self._actual_performance[graph_name].setdefault(result_type, {})

	193 actual = self._actual_performance[graph_name][result_type]

	194

	195 if m: # a test on the reference build

	196 actual['ref'] = value

	197 else: # a test on the current build

	198 actual['test'] = value

	199 actual['var'] = stddev

	200

	201 # If we have both the current and ref results, compute the delta for this

	202 # result type.

	203 if 'test' in actual and 'ref' in actual:

	204 actual['delta'] = actual['test'] - actual['ref']

152	205

153 def PerformanceChangesAsText(self):	206 def PerformanceChangesAsText(self):

154 text = []	207 text = []

155	208

156 if len(self._perf_regress) > 0:	209 if len(self._perf_regress) > 0:

157 text.append("PERF_REGRESS: " + ', '.join(self._perf_regress))	210 text.append("PERF_REGRESS: " + ', '.join(self._perf_regress))

158	211

159 if len(self._var_regress) > 0:	212 if len(self._var_regress) > 0:

160 text.append("VAR_REGRESS: " + ', '.join(self._var_regress))	213 text.append("VAR_REGRESS: " + ', '.join(self._var_regress))

161	214

162 if len(self._perf_improve) > 0:	215 if len(self._perf_improve) > 0:

163 text.append("PERF_IMPROVE: " + ', '.join(self._perf_improve))	216 text.append("PERF_IMPROVE: " + ', '.join(self._perf_improve))

164	217

165 if len(self._var_improve) > 0:	218 if len(self._var_improve) > 0:

166 text.append("VAR_IMPROVE: " + ', '.join(self._var_improve))	219 text.append("VAR_IMPROVE: " + ', '.join(self._var_improve))

167	220

168 return text	221 return text

169	222

	223 def ComparePerformance(self, graph_name, result_type):

	224 # Skip graphs and result types we don't expect values for.

	225 if (not graph_name in self._expected_performance or

	226 not result_type in self._expected_performance[graph_name]):

	227 return

	228

	229 expected = self._expected_performance[graph_name][result_type]

	230 actual = self._actual_performance[graph_name][result_type]

	231 graph_result = graph_name + '/' + result_type

	232

	233 # Skip result types that didn't calculate a delta.

	234 if not 'delta' in actual:

	235 return

	236

	237 # Exception when missing delta/var in expectations.

	238 if not 'delta' in expected or not 'var' in expected:
	M-A Ruel 2009/09/03 23:43:41 nit: Why do you care at all if you want to throw a nit: Why do you care at all if you want to throw an exception anyway?
	239 missing = []

	240 if not 'delta' in expected:

	241 missing.append('delta')

	242 if not 'var' in expected:

	243 missing.append('var')

	244 msg = "%s/%s expectations missing %s" % (self._perf_name, graph_result,

	245 ', '.join(missing))

	246 raise Exception(msg)

	247

	248 # Set the high and low performance and variance tolerances. The actual

	249 # delta and variance needs to be within 50% above and below this range to

	250 # keep the performance test green. If the results fall above or below

	251 # this range, the test will go red (signaling a regression) or orange

	252 # (signaling a speedup).

	253 high_perf = (expected['delta'] + 1.5*expected['var'])

	254 low_perf = (expected['delta'] - 1.5*expected['var'])

	255 high_var = 1.5*expected['var']

	256 low_var = 0.5*expected['var']

	257

	258 if actual['delta'] > high_perf:

	259 self._perf_regress.append(graph_result)

	260 elif actual['delta'] < low_perf:

	261 self._perf_improve.append(graph_result)

	262

	263 if actual['var'] > high_var:

	264 self._var_regress.append(graph_result)

	265 elif actual['var'] < low_var:

	266 self._var_improve.append(graph_result)

	267

170 def PerformanceChanges(self):	268 def PerformanceChanges(self):

171 # Load performance expectations for this test.	269 # Load performance expectations for this test.

172 self.LoadPerformanceExpectations()	270 self.LoadPerformanceExpectations()

173	271

174 # Return if no performance expectations or results were found.

175 if not self._expected_performance or not self._actual_performance:

176 return []

177

178 # Compare actual and expected results.	272 # Compare actual and expected results.

179 for type in self._result_types:	273 for graph_name in self._actual_performance:

180 if not (self._expected_performance.has_key(type) and	274 for result_type in self._actual_performance[graph_name]:

181 self._actual_performance.has_key(type)):	275 self.ComparePerformance(graph_name, result_type)

182 # Skip result types we don't know about.

183 continue

184

185 expect = self._expected_performance[type]

186 actual = self._actual_performance[type]

187

188 # Set the high and low performance and variance tolerances. The actual

189 # delta and variance needs to be within 50% above and below this range to

190 # keep the performance test green. If the results fall above or below

191 # this range, the test will go red (signaling a regression) or orange

192 # (signaling a speedup).

193 high_perf = (expect['delta'] + 1.5*expect['var'])

194 low_perf = (expect['delta'] - 1.5*expect['var'])

195 high_var = 1.5*expect['var']

196 low_var = 0.5*expect['var']

197

198 if actual['delta'] > high_perf:

199 self._perf_regress.append(type)

200 elif actual['delta'] < low_perf:

201 self._perf_improve.append(type)

202

203 if actual['var'] > high_var:

204 self._var_regress.append(type)

205 elif actual['var'] < low_var:

206 self._var_improve.append(type)

207	276

208 return self.PerformanceChangesAsText()	277 return self.PerformanceChangesAsText()

209	278

210 def evaluateCommand(self, cmd):	279 def evaluateCommand(self, cmd):

211 if len(self._perf_regress) > 0 or len(self._var_regress) > 0:	280 if len(self._perf_regress) > 0 or len(self._var_regress) > 0:

212 return builder.FAILURE	281 return builder.FAILURE

213	282

214 if len(self._perf_improve) > 0 or len(self._var_improve) > 0:	283 if len(self._perf_improve) > 0 or len(self._var_improve) > 0:

215 return builder.WARNINGS	284 return builder.WARNINGS

216	285

(...skipping 296 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
513 else:	582 else:

514 try:	583 try:

515 trace.value = float(trace.value)	584 trace.value = float(trace.value)

516 except ValueError:	585 except ValueError:

517 logging.warning("Bad test output: '%s'" % trace.value.strip())	586 logging.warning("Bad test output: '%s'" % trace.value.strip())

518 return	587 return

519	588

520 graph.traces[trace_name] = trace	589 graph.traces[trace_name] = trace

521 self._graphs[graph_name] = graph	590 self._graphs[graph_name] = graph

522	591

523 # Set actual performance data when we come across useful values.	592 # Store values in actual performance.

524 #	593 self.TrackActualPerformance(graph_name=graph_name, result_type=trace_name,

525 # trace_name will be of the form "RESULTTYPE" or "RESULTTYPE_ref".	594 value=trace.value, stddev=trace.stddev)

526 # A trace with _ref in its name refers to a reference build.

527 #

528 # Common result types for page cyclers: t, vm_rss_f_r, IO_b_b, etc.

529 # A test's result types vary between test types. Currently, a test

530 # only needs to output the appropriate text format to embed a new

531 # result type.

532

533 m = re.match(r"^(\w+)_ref$", trace_name)

534 if m:

535 is_ref_build = True

536 result_type = m.group(1)

537 else:

538 is_ref_build = False

539 result_type = trace_name

540

541 if not self._actual_performance:

542 self._actual_performance = {}

543

544 if not self._actual_performance.has_key(result_type):

545 self._actual_performance[result_type] = {}

546

547 actual = self._actual_performance[result_type]

548 if is_ref_build:

549 actual['ref'] = trace.value

550 else:

551 actual['test'] = trace.value

552 actual['var'] = trace.stddev

553

554 # If we have both the test and ref results, compute the delta for this

555 # result type.

556 if actual.has_key('test') and actual.has_key('ref'):

557 self._result_types.append(result_type)

558 actual['delta'] = actual['test'] - actual['ref']

559	595

560 def _CalculateStatistics(self, value_list, trace_name):	596 def _CalculateStatistics(self, value_list, trace_name):

561 """Returns a tuple (mean, standard deviation) from a list of values.	597 """Returns a tuple (mean, standard deviation) from a list of values.

562	598

563 This method may be overridden by subclasses wanting a different standard	599 This method may be overridden by subclasses wanting a different standard

564 deviation calcuation (or some other sort of error value entirely).	600 deviation calcuation (or some other sort of error value entirely).

565	601

566 Args:	602 Args:

567 value_list: the list of values to use in the calculation	603 value_list: the list of values to use in the calculation

568 trace_name: the trace that produced the data (not used in the base	604 trace_name: the trace that produced the data (not used in the base

(...skipping 182 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
751 FormatFloat(mean),	787 FormatFloat(mean),

752 FormatFloat(stddev),	788 FormatFloat(stddev),

753 self._JoinWithSpacesAndNewLine(times)))	789 self._JoinWithSpacesAndNewLine(times)))

754	790

755 filename = os.path.join(self._output_dir,	791 filename = os.path.join(self._output_dir,

756 '%s_%s.dat' % (self._revision, trace_name))	792 '%s_%s.dat' % (self._revision, trace_name))

757 file = open(filename, 'w')	793 file = open(filename, 'w')

758 file.write(''.join(file_data))	794 file.write(''.join(file_data))

759 file.close()	795 file.close()

760 os.chmod(filename, READABLE_FILE_PERMISSIONS)	796 os.chmod(filename, READABLE_FILE_PERMISSIONS)

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »