appengine/findit/waterfall/monitor_try_job_pipeline.py - Issue 1906293002: [Findit] Adding additional fields to try job metadata

Side by Side Diff: appengine/findit/waterfall/monitor_try_job_pipeline.py

Issue 1906293002: [Findit] Adding additional fields to try job metadata (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Addressing comments Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from datetime import datetime	5 from datetime import datetime

6 import time	6 import time

7	7

8 from common.pipeline_wrapper import BasePipeline	8 from common.pipeline_wrapper import BasePipeline

9 from common.pipeline_wrapper import pipeline	9 from common.pipeline_wrapper import pipeline

10 from common.waterfall import buildbucket_client	10 from common.waterfall import buildbucket_client

(...skipping 15 matching lines...) Expand all Loading...
26 TIMEOUT = 'TIMEOUT'	26 TIMEOUT = 'TIMEOUT'

27	27

28 @staticmethod	28 @staticmethod

29 def _MicrosecondsToDatetime(microseconds):	29 def _MicrosecondsToDatetime(microseconds):

30 """Returns a datetime given the number of microseconds, or None."""	30 """Returns a datetime given the number of microseconds, or None."""

31 if microseconds:	31 if microseconds:

32 return datetime.utcfromtimestamp(float(microseconds) / 1000000)	32 return datetime.utcfromtimestamp(float(microseconds) / 1000000)

33 return None	33 return None

34	34

35 @staticmethod	35 @staticmethod

36 def _UpdateTryJobMetadataForBuildError(try_job_data, error):	36 def _GetError(buildbucket_error, timed_out):

37 try_job_data.error = {	37 # TODO(lijeffrey): Currently only timeouts (Findit abandoned monitoring the

38 'message': error.message,	38 # try job after waiting too long for it to complete) and errors reported

39 'reason': error.reason	39 # directly in the buildbucket_client request are captured. Several other

40 }	40 # failures can be derrived from the response in the build too which should

41 try_job_data.put()	41 # be determined here.

	42 if buildbucket_error:

	43 return {

	44 'message': buildbucket_error.message,

	45 'reason': buildbucket_error.reason

	46 }

	47

	48 if timed_out:

	49 return {

	50 'message': 'Try job monitoring was abandoned.',

	51 'reason': MonitorTryJobPipeline.TIMEOUT

	52 }

42	53

43 @staticmethod	54 @staticmethod

44 def _UpdateTryJobMetadataForCompletedBuild(try_job_data, build, start_time,	55 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build,

45 timed_out=False):	56 buildbucket_error, timed_out):

46 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime(	57 if buildbucket_build:

47 build.request_time)	58 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime(

48 # If start_time is unavailable, fallback to request_time.	59 buildbucket_build.request_time)

49 try_job_data.start_time = start_time or try_job_data.request_time	60 # If start_time is unavailable, fallback to request_time.

50 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime(	61 try_job_data.start_time = start_time or try_job_data.request_time

51 build.end_time)	62 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime(

52 try_job_data.number_of_commits_analyzed = len(	63 buildbucket_build.end_time)

53 build.report.get('result', {}))	64 try_job_data.number_of_commits_analyzed = len(

54 try_job_data.try_job_url = build.url	65 buildbucket_build.report.get('result', {}))

55 try_job_data.regression_range_size = build.report.get(	66 try_job_data.try_job_url = buildbucket_build.url

56 'metadata', {}).get('regression_range_size')	67 try_job_data.regression_range_size = buildbucket_build.report.get(

57 if timed_out:	68 'metadata', {}).get('regression_range_size')

58 try_job_data.error = {	69 try_job_data.last_buildbucket_response = buildbucket_build.response

59 'message': MonitorTryJobPipeline.TIMEOUT,	70

60 'reason': MonitorTryJobPipeline.TIMEOUT	71 error = MonitorTryJobPipeline._GetError(buildbucket_error, timed_out)

61 }	72

	73 if error:

	74 try_job_data.error = error

	75

62 try_job_data.put()	76 try_job_data.put()

63	77

64 def _UpdateTryJobResult(	78 def _UpdateTryJobResult(

65 self, status, master_name, builder_name, build_number, try_job_type,	79 self, status, master_name, builder_name, build_number, try_job_type,

66 try_job_id, try_job_url, result_content=None):	80 try_job_id, try_job_url, result_content=None):

67 """Updates try job result based on responsed try job status and result."""	81 """Updates try job result based on responsed try job status and result."""

68 result = {	82 result = {

69 'report': result_content,	83 'report': result_content,

70 'url': try_job_url,	84 'url': try_job_url,

71 'try_job_id': try_job_id,	85 'try_job_id': try_job_id,

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
104 pipeline_wait_seconds = default_pipeline_wait_seconds	118 pipeline_wait_seconds = default_pipeline_wait_seconds

105 allowed_response_error_times = max_error_times	119 allowed_response_error_times = max_error_times

106	120

107 # TODO(chanli): Make sure total wait time equals to timeout_hours	121 # TODO(chanli): Make sure total wait time equals to timeout_hours

108 # regardless of retries.	122 # regardless of retries.

109 deadline = time.time() + timeout_hours * 60 * 60	123 deadline = time.time() + timeout_hours * 60 * 60

110 try_job_data = (WfTryJobData.Get(try_job_id) or	124 try_job_data = (WfTryJobData.Get(try_job_id) or

111 WfTryJobData.Create(try_job_id))	125 WfTryJobData.Create(try_job_id))

112 try_job_data.master_name = master_name	126 try_job_data.master_name = master_name

113 try_job_data.builder_name = builder_name	127 try_job_data.builder_name = builder_name

	128 try_job_data.build_number = build_number

114 try_job_data.try_job_type = try_job_type	129 try_job_data.try_job_type = try_job_type

115	130

116 already_set_started = False	131 already_set_started = False

117 start_time = None	132 start_time = None

118 while True:	133 while True:

119 error, build = buildbucket_client.GetTryJobs([try_job_id])[0]	134 error, build = buildbucket_client.GetTryJobs([try_job_id])[0]

120 if error:	135 if error:

121 if allowed_response_error_times > 0:	136 if allowed_response_error_times > 0:

122 allowed_response_error_times -= 1	137 allowed_response_error_times -= 1

123 pipeline_wait_seconds += default_pipeline_wait_seconds	138 pipeline_wait_seconds += default_pipeline_wait_seconds

124 else: # pragma: no cover	139 else: # pragma: no cover

125 # Buildbucket has responded error more than 5 times, retry pipeline.	140 # Buildbucket has responded error more than 5 times, retry pipeline.

126 self._UpdateTryJobMetadataForBuildError(try_job_data, error)	141 self._UpdateTryJobMetadata(

	142 try_job_data, start_time, build, error, False)

127 raise pipeline.Retry(	143 raise pipeline.Retry(

128 'Error "%s" occurred. Reason: "%s"' % (error.message,	144 'Error "%s" occurred. Reason: "%s"' % (error.message,

129 error.reason))	145 error.reason))

130 elif build.status == BuildbucketBuild.COMPLETED:	146 elif build.status == BuildbucketBuild.COMPLETED:

131 self._UpdateTryJobMetadataForCompletedBuild(	147 self._UpdateTryJobMetadata(

132 try_job_data, build, start_time)	148 try_job_data, start_time, build, error, False)

133 result_to_update = self._UpdateTryJobResult(	149 result_to_update = self._UpdateTryJobResult(

134 BuildbucketBuild.COMPLETED, master_name, builder_name, build_number,	150 BuildbucketBuild.COMPLETED, master_name, builder_name, build_number,

135 try_job_type, try_job_id, build.url, build.report)	151 try_job_type, try_job_id, build.url, build.report)

136 return result_to_update[-1]	152 return result_to_update[-1]

137 else:	153 else:

138 if allowed_response_error_times < max_error_times:	154 if allowed_response_error_times < max_error_times:

139 # Recovers from errors.	155 # Recovers from errors.

140 allowed_response_error_times = max_error_times	156 allowed_response_error_times = max_error_times

141 pipeline_wait_seconds = default_pipeline_wait_seconds	157 pipeline_wait_seconds = default_pipeline_wait_seconds

142 if build.status == BuildbucketBuild.STARTED and not already_set_started:	158 if build.status == BuildbucketBuild.STARTED and not already_set_started:

143 # It is possible this branch is skipped if a fast build goes from	159 # It is possible this branch is skipped if a fast build goes from

144 # 'SCHEDULED' to 'COMPLETED' between queries, so start_time may be	160 # 'SCHEDULED' to 'COMPLETED' between queries, so start_time may be

145 # unavailable.	161 # unavailable.

146 start_time = self._MicrosecondsToDatetime(build.updated_time)	162 start_time = self._MicrosecondsToDatetime(build.updated_time)

147 self._UpdateTryJobResult(	163 self._UpdateTryJobResult(

148 BuildbucketBuild.STARTED, master_name, builder_name, build_number,	164 BuildbucketBuild.STARTED, master_name, builder_name, build_number,

149 try_job_type, try_job_id, build.url)	165 try_job_type, try_job_id, build.url)

150 already_set_started = True	166 already_set_started = True

151	167

152 if time.time() > deadline: # pragma: no cover	168 if time.time() > deadline: # pragma: no cover

153 self._UpdateTryJobMetadataForCompletedBuild(	169 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True)

154 try_job_data, build, start_time, timed_out=True)

155 # Explicitly abort the whole pipeline.	170 # Explicitly abort the whole pipeline.

156 raise pipeline.Abort(	171 raise pipeline.Abort(

157 'Try job %s timed out after %d hours.' % (	172 'Try job %s timed out after %d hours.' % (

158 try_job_id, timeout_hours))	173 try_job_id, timeout_hours))

159	174

160 time.sleep(pipeline_wait_seconds) # pragma: no cover	175 time.sleep(pipeline_wait_seconds) # pragma: no cover

OLD	NEW

« no previous file with comments | « appengine/findit/model/wf_try_job_data.py ('k') | appengine/findit/waterfall/test/monitor_try_job_pipeline_test.py » ('j') | no next file with comments »