appengine/findit/waterfall/process_base_swarming_task_result_pipeline.py - Issue 2526963002: [Findit] Implement retry within swarming_util.py when making server calls

Side by Side Diff: appengine/findit/waterfall/process_base_swarming_task_result_pipeline.py

Issue 2526963002: [Findit] Implement retry within swarming_util.py when making server calls (Closed)

Patch Set: Addressing comments Rebase Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from collections import defaultdict	5 from collections import defaultdict

6 import datetime	6 import datetime

7 import logging	7 import logging

8 import time	8 import time

9	9

10 from common.http_client_appengine import HttpClientAppengine as HttpClient	10 from common.http_client_appengine import HttpClientAppengine as HttpClient

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
80 task_started = False	80 task_started = False

81 task_completed = False	81 task_completed = False

82 step_name_no_platform = None	82 step_name_no_platform = None

83 task = self._GetSwarmingTask(*call_args)	83 task = self._GetSwarmingTask(*call_args)

84	84

85 while not task_completed:	85 while not task_completed:

86 data, error = swarming_util.GetSwarmingTaskResultById(	86 data, error = swarming_util.GetSwarmingTaskResultById(

87 task_id, self.HTTP_CLIENT)	87 task_id, self.HTTP_CLIENT)

88	88

89 if error:	89 if error:

90 # An error occurred when trying to contact the swarming server.	90 # An error occurred at some point when trying to retrieve data from

91 task.status = analysis_status.ERROR	91 # the swarming server, even if eventually successful.

92 task.error = error	92 task.error = error

93 task.put()	93 task.put()

94 break	94

	95 if not data:

	96 # Even after retry, no data was recieved.

	97 task.status = analysis_status.ERROR

	98 break

95	99

96 task_state = data['state']	100 task_state = data['state']

97 exit_code = (data.get('exit_code') if	101 exit_code = (data.get('exit_code') if

98 task_state == swarming_util.STATE_COMPLETED else None)	102 task_state == swarming_util.STATE_COMPLETED else None)

99 step_name_no_platform = (	103 step_name_no_platform = (

100 step_name_no_platform or swarming_util.GetTagValue(	104 step_name_no_platform or swarming_util.GetTagValue(

101 data.get('tags', {}), 'ref_name'))	105 data.get('tags', {}), 'ref_name'))

102	106

103 if task_state not in swarming_util.STATES_RUNNING:	107 if task_state not in swarming_util.STATES_RUNNING:

104 task_completed = True	108 task_completed = True

105	109

106 if (task_state == swarming_util.STATE_COMPLETED and	110 if (task_state == swarming_util.STATE_COMPLETED and

107 int(exit_code) != swarming_util.TASK_FAILED):	111 int(exit_code) != swarming_util.TASK_FAILED):

108 outputs_ref = data.get('outputs_ref')	112 outputs_ref = data.get('outputs_ref')

109	113

110 # If swarming task aborted because of errors in request arguments,	114 # If swarming task aborted because of errors in request arguments,

111 # it's possible that there is no outputs_ref.	115 # it's possible that there is no outputs_ref.

112 if not outputs_ref:	116 if not outputs_ref:

113 task.status = analysis_status.ERROR	117 task.status = analysis_status.ERROR

114 task.error = {	118 task.error = {

115 'code': swarming_util.NO_TASK_OUTPUTS,	119 'code': swarming_util.NO_TASK_OUTPUTS,

116 'message': 'outputs_ref is None'	120 'message': 'outputs_ref is None'

117 }	121 }

118 task.put()	122 task.put()

119 break	123 break

120	124

121 output_json, error = swarming_util.GetSwarmingTaskFailureLog(	125 output_json, error = swarming_util.GetSwarmingTaskFailureLog(

122 outputs_ref, self.HTTP_CLIENT)	126 outputs_ref, self.HTTP_CLIENT)

123	127

	128 task.status = analysis_status.COMPLETED

	129

124 if error:	130 if error:

125 task.status = analysis_status.ERROR

126 task.error = error	131 task.error = error

127 else:	132

128 task.status = analysis_status.COMPLETED	133 if not output_json:

	134 # Retry was ultimately unsuccessful.

	135 task.status = analysis_status.ERROR

129	136

130 tests_statuses = self._CheckTestsRunStatuses(output_json, *call_args)	137 tests_statuses = self._CheckTestsRunStatuses(output_json, *call_args)

131 task.tests_statuses = tests_statuses	138 task.tests_statuses = tests_statuses

132 task.put()	139 task.put()

133 else:	140 else:

134 if exit_code is not None:	141 if exit_code is not None:

135 # Swarming task completed, but the task failed.	142 # Swarming task completed, but the task failed.

136 code = int(exit_code)	143 code = int(exit_code)

137 message = swarming_util.EXIT_CODE_DESCRIPTIONS[code]	144 message = swarming_util.EXIT_CODE_DESCRIPTIONS[code]

138 else:	145 else:

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
203 task_id (str): The task id to query the swarming server on the progresss	210 task_id (str): The task id to query the swarming server on the progresss

204 of a swarming task.	211 of a swarming task.

205	212

206 Returns:	213 Returns:

207 A dict of lists for reliable/flaky tests.	214 A dict of lists for reliable/flaky tests.

208 """	215 """

209 call_args = self._GetArgs(master_name, builder_name, build_number,	216 call_args = self._GetArgs(master_name, builder_name, build_number,

210 step_name, *args)	217 step_name, *args)

211 step_name_no_platform = self._MonitorSwarmingTask(task_id, *call_args)	218 step_name_no_platform = self._MonitorSwarmingTask(task_id, *call_args)

212 return step_name, step_name_no_platform	219 return step_name, step_name_no_platform

OLD	NEW