Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1375)

Side by Side Diff: appengine/findit/waterfall/monitor_try_job_pipeline.py

Issue 1926473002: [Findit] Adding new fields to try job metadata and updating as soon as possible (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Addressing comments Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from datetime import datetime 5 from datetime import datetime
6 import json 6 import json
7 import time 7 import time
8 8
9 from common.pipeline_wrapper import BasePipeline 9 from common.pipeline_wrapper import BasePipeline
10 from common.pipeline_wrapper import pipeline 10 from common.pipeline_wrapper import pipeline
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
103 'reason': MonitorTryJobPipeline.UNKNOWN 103 'reason': MonitorTryJobPipeline.UNKNOWN
104 }, 104 },
105 try_job_error.UNKNOWN) 105 try_job_error.UNKNOWN)
106 106
107 return None, None 107 return None, None
108 108
109 @staticmethod 109 @staticmethod
110 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build, 110 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build,
111 buildbucket_error, timed_out): 111 buildbucket_error, timed_out):
112 buildbucket_response = {} 112 buildbucket_response = {}
113
113 if buildbucket_build: 114 if buildbucket_build:
114 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime( 115 try_job_data.request_time = (
115 buildbucket_build.request_time) 116 try_job_data.request_time or
117 MonitorTryJobPipeline._MicrosecondsToDatetime(
118 buildbucket_build.request_time))
116 # If start_time is unavailable, fallback to request_time. 119 # If start_time is unavailable, fallback to request_time.
117 try_job_data.start_time = start_time or try_job_data.request_time 120 try_job_data.start_time = start_time or try_job_data.request_time
118 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime( 121 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime(
119 buildbucket_build.end_time) 122 buildbucket_build.end_time)
120 try_job_data.number_of_commits_analyzed = len( 123 try_job_data.number_of_commits_analyzed = len(
121 buildbucket_build.report.get('result', {})) 124 buildbucket_build.report.get('result', {}))
122 try_job_data.try_job_url = buildbucket_build.url
123 try_job_data.regression_range_size = buildbucket_build.report.get( 125 try_job_data.regression_range_size = buildbucket_build.report.get(
124 'metadata', {}).get('regression_range_size') 126 'metadata', {}).get('regression_range_size')
125 try_job_data.last_buildbucket_response = buildbucket_build.response 127 try_job_data.try_job_url = (
128 try_job_data.try_job_url or buildbucket_build.url)
stgao 2016/04/27 17:34:33 Why we don't use the one from buildbucket?
lijeffrey 2016/04/27 22:21:03 In case it's already set from line 229 when the bu
126 buildbucket_response = buildbucket_build.response 129 buildbucket_response = buildbucket_build.response
130 try_job_data.last_buildbucket_response = buildbucket_response
127 131
128 error_dict, error_code = MonitorTryJobPipeline._GetError( 132 error_dict, error_code = MonitorTryJobPipeline._GetError(
129 buildbucket_response, buildbucket_error, timed_out) 133 buildbucket_response, buildbucket_error, timed_out)
130 134
131 if error_dict: 135 if error_dict:
132 try_job_data.error = error_dict 136 try_job_data.error = error_dict
133 try_job_data.error_code = error_code 137 try_job_data.error_code = error_code
134 138
135 try_job_data.put() 139 try_job_data.put()
136 140
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 default_pipeline_wait_seconds = waterfall_config.GetTryJobSettings().get( 177 default_pipeline_wait_seconds = waterfall_config.GetTryJobSettings().get(
174 'server_query_interval_seconds') 178 'server_query_interval_seconds')
175 max_error_times = waterfall_config.GetTryJobSettings().get( 179 max_error_times = waterfall_config.GetTryJobSettings().get(
176 'allowed_response_error_times') 180 'allowed_response_error_times')
177 pipeline_wait_seconds = default_pipeline_wait_seconds 181 pipeline_wait_seconds = default_pipeline_wait_seconds
178 allowed_response_error_times = max_error_times 182 allowed_response_error_times = max_error_times
179 183
180 # TODO(chanli): Make sure total wait time equals to timeout_hours 184 # TODO(chanli): Make sure total wait time equals to timeout_hours
181 # regardless of retries. 185 # regardless of retries.
182 deadline = time.time() + timeout_hours * 60 * 60 186 deadline = time.time() + timeout_hours * 60 * 60
183 try_job_data = (WfTryJobData.Get(try_job_id) or 187 try_job_data = WfTryJobData.Get(try_job_id)
184 WfTryJobData.Create(try_job_id))
185 try_job_data.master_name = master_name
186 try_job_data.builder_name = builder_name
187 try_job_data.build_number = build_number
188 try_job_data.try_job_type = try_job_type
189
190 already_set_started = False 188 already_set_started = False
191 start_time = None 189 start_time = None
192 while True: 190 while True:
193 error, build = buildbucket_client.GetTryJobs([try_job_id])[0] 191 error, build = buildbucket_client.GetTryJobs([try_job_id])[0]
194 if error: 192 if error:
195 if allowed_response_error_times > 0: 193 if allowed_response_error_times > 0:
196 allowed_response_error_times -= 1 194 allowed_response_error_times -= 1
197 pipeline_wait_seconds += default_pipeline_wait_seconds 195 pipeline_wait_seconds += default_pipeline_wait_seconds
198 else: # pragma: no cover 196 else: # pragma: no cover
199 # Buildbucket has responded error more than 5 times, retry pipeline. 197 # Buildbucket has responded error more than 5 times, retry pipeline.
(...skipping 15 matching lines...) Expand all
215 allowed_response_error_times = max_error_times 213 allowed_response_error_times = max_error_times
216 pipeline_wait_seconds = default_pipeline_wait_seconds 214 pipeline_wait_seconds = default_pipeline_wait_seconds
217 if build.status == BuildbucketBuild.STARTED and not already_set_started: 215 if build.status == BuildbucketBuild.STARTED and not already_set_started:
218 # It is possible this branch is skipped if a fast build goes from 216 # It is possible this branch is skipped if a fast build goes from
219 # 'SCHEDULED' to 'COMPLETED' between queries, so start_time may be 217 # 'SCHEDULED' to 'COMPLETED' between queries, so start_time may be
220 # unavailable. 218 # unavailable.
221 start_time = self._MicrosecondsToDatetime(build.updated_time) 219 start_time = self._MicrosecondsToDatetime(build.updated_time)
222 self._UpdateTryJobResult( 220 self._UpdateTryJobResult(
223 BuildbucketBuild.STARTED, master_name, builder_name, build_number, 221 BuildbucketBuild.STARTED, master_name, builder_name, build_number,
224 try_job_type, try_job_id, build.url) 222 try_job_type, try_job_id, build.url)
223
224 # Update as much try job metadata as soon as possible to avoid data
225 # loss in case of errors.
226 try_job_data.start_time = start_time
227 try_job_data.request_time = (
228 MonitorTryJobPipeline._MicrosecondsToDatetime(build.request_time))
229 try_job_data.try_job_url = build.url
230 try_job_data.put()
231
225 already_set_started = True 232 already_set_started = True
226 233
227 if time.time() > deadline: # pragma: no cover 234 if time.time() > deadline: # pragma: no cover
228 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True) 235 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True)
229 # Explicitly abort the whole pipeline. 236 # Explicitly abort the whole pipeline.
230 raise pipeline.Abort( 237 raise pipeline.Abort(
231 'Try job %s timed out after %d hours.' % ( 238 'Try job %s timed out after %d hours.' % (
232 try_job_id, timeout_hours)) 239 try_job_id, timeout_hours))
233 240
234 time.sleep(pipeline_wait_seconds) # pragma: no cover 241 time.sleep(pipeline_wait_seconds) # pragma: no cover
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698