appengine/findit/waterfall/monitor_try_job_pipeline.py - Issue 1926473002: [Findit] Adding new fields to try job metadata and updating as soon as possible

Side by Side Diff: appengine/findit/waterfall/monitor_try_job_pipeline.py

Issue 1926473002: [Findit] Adding new fields to try job metadata and updating as soon as possible (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Addressing comments Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « appengine/findit/model/wf_try_job_data.py ('k') | appengine/findit/waterfall/schedule_try_job_pipeline.py » ('j') | appengine/findit/waterfall/schedule_try_job_pipeline.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from datetime import datetime	5 from datetime import datetime

6 import json	6 import json

7 import time	7 import time

8	8

9 from common.pipeline_wrapper import BasePipeline	9 from common.pipeline_wrapper import BasePipeline

10 from common.pipeline_wrapper import pipeline	10 from common.pipeline_wrapper import pipeline

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
103 'reason': MonitorTryJobPipeline.UNKNOWN	103 'reason': MonitorTryJobPipeline.UNKNOWN

104 },	104 },

105 try_job_error.UNKNOWN)	105 try_job_error.UNKNOWN)

106	106

107 return None, None	107 return None, None

108	108

109 @staticmethod	109 @staticmethod

110 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build,	110 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build,

111 buildbucket_error, timed_out):	111 buildbucket_error, timed_out):

112 buildbucket_response = {}	112 buildbucket_response = {}

	113

113 if buildbucket_build:	114 if buildbucket_build:

114 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime(	115 try_job_data.request_time = (

115 buildbucket_build.request_time)	116 try_job_data.request_time or

	117 MonitorTryJobPipeline._MicrosecondsToDatetime(

	118 buildbucket_build.request_time))

116 # If start_time is unavailable, fallback to request_time.	119 # If start_time is unavailable, fallback to request_time.

117 try_job_data.start_time = start_time or try_job_data.request_time	120 try_job_data.start_time = start_time or try_job_data.request_time

118 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime(	121 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime(

119 buildbucket_build.end_time)	122 buildbucket_build.end_time)

120 try_job_data.number_of_commits_analyzed = len(	123 try_job_data.number_of_commits_analyzed = len(

121 buildbucket_build.report.get('result', {}))	124 buildbucket_build.report.get('result', {}))

122 try_job_data.try_job_url = buildbucket_build.url

123 try_job_data.regression_range_size = buildbucket_build.report.get(	125 try_job_data.regression_range_size = buildbucket_build.report.get(

124 'metadata', {}).get('regression_range_size')	126 'metadata', {}).get('regression_range_size')

125 try_job_data.last_buildbucket_response = buildbucket_build.response	127 try_job_data.try_job_url = (

	128 try_job_data.try_job_url or buildbucket_build.url)
	stgao 2016/04/27 17:34:33 Why we don't use the one from buildbucket? Why we don't use the one from buildbucket? lijeffrey 2016/04/27 22:21:03 In case it's already set from line 229 when the bu Show quoted text On 2016/04/27 17:34:33, stgao wrote: > Why we don't use the one from buildbucket? In case it's already set from line 229 when the build status first changes to STARTED, though it shouldn't make a difference. I'll change it back to just buildbucket_build.url
126 buildbucket_response = buildbucket_build.response	129 buildbucket_response = buildbucket_build.response

	130 try_job_data.last_buildbucket_response = buildbucket_response

127	131

128 error_dict, error_code = MonitorTryJobPipeline._GetError(	132 error_dict, error_code = MonitorTryJobPipeline._GetError(

129 buildbucket_response, buildbucket_error, timed_out)	133 buildbucket_response, buildbucket_error, timed_out)

130	134

131 if error_dict:	135 if error_dict:

132 try_job_data.error = error_dict	136 try_job_data.error = error_dict

133 try_job_data.error_code = error_code	137 try_job_data.error_code = error_code

134	138

135 try_job_data.put()	139 try_job_data.put()

136	140

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
173 default_pipeline_wait_seconds = waterfall_config.GetTryJobSettings().get(	177 default_pipeline_wait_seconds = waterfall_config.GetTryJobSettings().get(

174 'server_query_interval_seconds')	178 'server_query_interval_seconds')

175 max_error_times = waterfall_config.GetTryJobSettings().get(	179 max_error_times = waterfall_config.GetTryJobSettings().get(

176 'allowed_response_error_times')	180 'allowed_response_error_times')

177 pipeline_wait_seconds = default_pipeline_wait_seconds	181 pipeline_wait_seconds = default_pipeline_wait_seconds

178 allowed_response_error_times = max_error_times	182 allowed_response_error_times = max_error_times

179	183

180 # TODO(chanli): Make sure total wait time equals to timeout_hours	184 # TODO(chanli): Make sure total wait time equals to timeout_hours

181 # regardless of retries.	185 # regardless of retries.

182 deadline = time.time() + timeout_hours * 60 * 60	186 deadline = time.time() + timeout_hours * 60 * 60

183 try_job_data = (WfTryJobData.Get(try_job_id) or	187 try_job_data = WfTryJobData.Get(try_job_id)

184 WfTryJobData.Create(try_job_id))

185 try_job_data.master_name = master_name

186 try_job_data.builder_name = builder_name

187 try_job_data.build_number = build_number

188 try_job_data.try_job_type = try_job_type

189

190 already_set_started = False	188 already_set_started = False

191 start_time = None	189 start_time = None

192 while True:	190 while True:

193 error, build = buildbucket_client.GetTryJobs([try_job_id])[0]	191 error, build = buildbucket_client.GetTryJobs([try_job_id])[0]

194 if error:	192 if error:

195 if allowed_response_error_times > 0:	193 if allowed_response_error_times > 0:

196 allowed_response_error_times -= 1	194 allowed_response_error_times -= 1

197 pipeline_wait_seconds += default_pipeline_wait_seconds	195 pipeline_wait_seconds += default_pipeline_wait_seconds

198 else: # pragma: no cover	196 else: # pragma: no cover

199 # Buildbucket has responded error more than 5 times, retry pipeline.	197 # Buildbucket has responded error more than 5 times, retry pipeline.

(...skipping 15 matching lines...) Expand all Loading...
215 allowed_response_error_times = max_error_times	213 allowed_response_error_times = max_error_times

216 pipeline_wait_seconds = default_pipeline_wait_seconds	214 pipeline_wait_seconds = default_pipeline_wait_seconds

217 if build.status == BuildbucketBuild.STARTED and not already_set_started:	215 if build.status == BuildbucketBuild.STARTED and not already_set_started:

218 # It is possible this branch is skipped if a fast build goes from	216 # It is possible this branch is skipped if a fast build goes from

219 # 'SCHEDULED' to 'COMPLETED' between queries, so start_time may be	217 # 'SCHEDULED' to 'COMPLETED' between queries, so start_time may be

220 # unavailable.	218 # unavailable.

221 start_time = self._MicrosecondsToDatetime(build.updated_time)	219 start_time = self._MicrosecondsToDatetime(build.updated_time)

222 self._UpdateTryJobResult(	220 self._UpdateTryJobResult(

223 BuildbucketBuild.STARTED, master_name, builder_name, build_number,	221 BuildbucketBuild.STARTED, master_name, builder_name, build_number,

224 try_job_type, try_job_id, build.url)	222 try_job_type, try_job_id, build.url)

	223

	224 # Update as much try job metadata as soon as possible to avoid data

	225 # loss in case of errors.

	226 try_job_data.start_time = start_time

	227 try_job_data.request_time = (

	228 MonitorTryJobPipeline._MicrosecondsToDatetime(build.request_time))

	229 try_job_data.try_job_url = build.url

	230 try_job_data.put()

	231

225 already_set_started = True	232 already_set_started = True

226	233

227 if time.time() > deadline: # pragma: no cover	234 if time.time() > deadline: # pragma: no cover

228 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True)	235 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True)

229 # Explicitly abort the whole pipeline.	236 # Explicitly abort the whole pipeline.

230 raise pipeline.Abort(	237 raise pipeline.Abort(

231 'Try job %s timed out after %d hours.' % (	238 'Try job %s timed out after %d hours.' % (

232 try_job_id, timeout_hours))	239 try_job_id, timeout_hours))

233	240

234 time.sleep(pipeline_wait_seconds) # pragma: no cover	241 time.sleep(pipeline_wait_seconds) # pragma: no cover

OLD	NEW