appengine/findit/waterfall/monitor_try_job_pipeline.py - Issue 1926473002: [Findit] Adding new fields to try job metadata and updating as soon as possible

Side by Side Diff: appengine/findit/waterfall/monitor_try_job_pipeline.py

Issue 1926473002: [Findit] Adding new fields to try job metadata and updating as soon as possible (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Addressing comments Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from datetime import datetime	5 from datetime import datetime

6 import json	6 import json

7 import time	7 import time

8	8

9 from common.pipeline_wrapper import BasePipeline	9 from common.pipeline_wrapper import BasePipeline

10 from common.pipeline_wrapper import pipeline	10 from common.pipeline_wrapper import pipeline

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
103 'reason': MonitorTryJobPipeline.UNKNOWN	103 'reason': MonitorTryJobPipeline.UNKNOWN

104 },	104 },

105 try_job_error.UNKNOWN)	105 try_job_error.UNKNOWN)

106	106

107 return None, None	107 return None, None

108	108

109 @staticmethod	109 @staticmethod

110 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build,	110 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build,

111 buildbucket_error, timed_out):	111 buildbucket_error, timed_out):

112 buildbucket_response = {}	112 buildbucket_response = {}

	113

113 if buildbucket_build:	114 if buildbucket_build:

114 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime(	115 try_job_data.request_time = (

115 buildbucket_build.request_time)	116 try_job_data.request_time or

	117 MonitorTryJobPipeline._MicrosecondsToDatetime(

	118 buildbucket_build.request_time))

116 # If start_time is unavailable, fallback to request_time.	119 # If start_time is unavailable, fallback to request_time.

117 try_job_data.start_time = start_time or try_job_data.request_time	120 try_job_data.start_time = start_time or try_job_data.request_time

118 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime(	121 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime(

119 buildbucket_build.end_time)	122 buildbucket_build.end_time)

120 try_job_data.number_of_commits_analyzed = len(	123 try_job_data.number_of_commits_analyzed = len(

121 buildbucket_build.report.get('result', {}))	124 buildbucket_build.report.get('result', {}))

122 try_job_data.try_job_url = buildbucket_build.url

123 try_job_data.regression_range_size = buildbucket_build.report.get(	125 try_job_data.regression_range_size = buildbucket_build.report.get(

124 'metadata', {}).get('regression_range_size')	126 'metadata', {}).get('regression_range_size')

125 try_job_data.last_buildbucket_response = buildbucket_build.response	127 try_job_data.try_job_url = buildbucket_build.url

126 buildbucket_response = buildbucket_build.response	128 buildbucket_response = buildbucket_build.response

	129 try_job_data.last_buildbucket_response = buildbucket_response

127	130

128 error_dict, error_code = MonitorTryJobPipeline._GetError(	131 error_dict, error_code = MonitorTryJobPipeline._GetError(

129 buildbucket_response, buildbucket_error, timed_out)	132 buildbucket_response, buildbucket_error, timed_out)

130	133

131 if error_dict:	134 if error_dict:

132 try_job_data.error = error_dict	135 try_job_data.error = error_dict

133 try_job_data.error_code = error_code	136 try_job_data.error_code = error_code

134	137

135 try_job_data.put()	138 try_job_data.put()

136	139

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
173 default_pipeline_wait_seconds = waterfall_config.GetTryJobSettings().get(	176 default_pipeline_wait_seconds = waterfall_config.GetTryJobSettings().get(

174 'server_query_interval_seconds')	177 'server_query_interval_seconds')

175 max_error_times = waterfall_config.GetTryJobSettings().get(	178 max_error_times = waterfall_config.GetTryJobSettings().get(

176 'allowed_response_error_times')	179 'allowed_response_error_times')

177 pipeline_wait_seconds = default_pipeline_wait_seconds	180 pipeline_wait_seconds = default_pipeline_wait_seconds

178 allowed_response_error_times = max_error_times	181 allowed_response_error_times = max_error_times

179	182

180 # TODO(chanli): Make sure total wait time equals to timeout_hours	183 # TODO(chanli): Make sure total wait time equals to timeout_hours

181 # regardless of retries.	184 # regardless of retries.

182 deadline = time.time() + timeout_hours * 60 * 60	185 deadline = time.time() + timeout_hours * 60 * 60

183 try_job_data = (WfTryJobData.Get(try_job_id) or	186 try_job_data = WfTryJobData.Get(try_job_id)

184 WfTryJobData.Create(try_job_id))

185 try_job_data.master_name = master_name

186 try_job_data.builder_name = builder_name

187 try_job_data.build_number = build_number

188 try_job_data.try_job_type = try_job_type

189

190 already_set_started = False	187 already_set_started = False

191 start_time = None	188 start_time = None

192 while True:	189 while True:

193 error, build = buildbucket_client.GetTryJobs([try_job_id])[0]	190 error, build = buildbucket_client.GetTryJobs([try_job_id])[0]

194 if error:	191 if error:

195 if allowed_response_error_times > 0:	192 if allowed_response_error_times > 0:

196 allowed_response_error_times -= 1	193 allowed_response_error_times -= 1

197 pipeline_wait_seconds += default_pipeline_wait_seconds	194 pipeline_wait_seconds += default_pipeline_wait_seconds

198 else: # pragma: no cover	195 else: # pragma: no cover

199 # Buildbucket has responded error more than 5 times, retry pipeline.	196 # Buildbucket has responded error more than 5 times, retry pipeline.

(...skipping 15 matching lines...) Expand all Loading...
215 allowed_response_error_times = max_error_times	212 allowed_response_error_times = max_error_times

216 pipeline_wait_seconds = default_pipeline_wait_seconds	213 pipeline_wait_seconds = default_pipeline_wait_seconds

217 if build.status == BuildbucketBuild.STARTED and not already_set_started:	214 if build.status == BuildbucketBuild.STARTED and not already_set_started:

218 # It is possible this branch is skipped if a fast build goes from	215 # It is possible this branch is skipped if a fast build goes from

219 # 'SCHEDULED' to 'COMPLETED' between queries, so start_time may be	216 # 'SCHEDULED' to 'COMPLETED' between queries, so start_time may be

220 # unavailable.	217 # unavailable.

221 start_time = self._MicrosecondsToDatetime(build.updated_time)	218 start_time = self._MicrosecondsToDatetime(build.updated_time)

222 self._UpdateTryJobResult(	219 self._UpdateTryJobResult(

223 BuildbucketBuild.STARTED, master_name, builder_name, build_number,	220 BuildbucketBuild.STARTED, master_name, builder_name, build_number,

224 try_job_type, try_job_id, build.url)	221 try_job_type, try_job_id, build.url)

	222

	223 # Update as much try job metadata as soon as possible to avoid data

	224 # loss in case of errors.

	225 try_job_data.start_time = start_time

	226 try_job_data.request_time = (

	227 MonitorTryJobPipeline._MicrosecondsToDatetime(build.request_time))

	228 try_job_data.try_job_url = build.url

	229 try_job_data.put()

	230

225 already_set_started = True	231 already_set_started = True

226	232

227 if time.time() > deadline: # pragma: no cover	233 if time.time() > deadline: # pragma: no cover

228 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True)	234 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True)

229 # Explicitly abort the whole pipeline.	235 # Explicitly abort the whole pipeline.

230 raise pipeline.Abort(	236 raise pipeline.Abort(

231 'Try job %s timed out after %d hours.' % (	237 'Try job %s timed out after %d hours.' % (

232 try_job_id, timeout_hours))	238 try_job_id, timeout_hours))

233	239

234 time.sleep(pipeline_wait_seconds) # pragma: no cover	240 time.sleep(pipeline_wait_seconds) # pragma: no cover

OLD	NEW

« no previous file with comments | « appengine/findit/model/wf_try_job_data.py ('k') | appengine/findit/waterfall/schedule_try_job_pipeline.py » ('j') | no next file with comments »