| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from collections import defaultdict | 5 from collections import defaultdict |
| 6 import datetime | 6 import datetime |
| 7 import logging | 7 import logging |
| 8 import time | 8 import time |
| 9 | 9 |
| 10 from common.http_client_appengine import HttpClientAppengine as HttpClient | 10 from common.http_client_appengine import HttpClientAppengine as HttpClient |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 64 for fmt in ('%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S'): | 64 for fmt in ('%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S'): |
| 65 # When microseconds are 0, the '.123456' suffix is elided. | 65 # When microseconds are 0, the '.123456' suffix is elided. |
| 66 try: | 66 try: |
| 67 return datetime.datetime.strptime(time_string, fmt) | 67 return datetime.datetime.strptime(time_string, fmt) |
| 68 except ValueError: | 68 except ValueError: |
| 69 pass | 69 pass |
| 70 raise ValueError('Failed to parse %s' % time_string) # pragma: no cover | 70 raise ValueError('Failed to parse %s' % time_string) # pragma: no cover |
| 71 | 71 |
| 72 def _MonitorSwarmingTask(self, task_id, *call_args): | 72 def _MonitorSwarmingTask(self, task_id, *call_args): |
| 73 """Monitors the swarming task and waits for it to complete.""" | 73 """Monitors the swarming task and waits for it to complete.""" |
| 74 assert task_id |
| 74 timeout_hours = waterfall_config.GetSwarmingSettings().get( | 75 timeout_hours = waterfall_config.GetSwarmingSettings().get( |
| 75 'task_timeout_hours') | 76 'task_timeout_hours') |
| 76 deadline = time.time() + timeout_hours * 60 * 60 | 77 deadline = time.time() + timeout_hours * 60 * 60 |
| 77 server_query_interval_seconds = waterfall_config.GetSwarmingSettings().get( | 78 server_query_interval_seconds = waterfall_config.GetSwarmingSettings().get( |
| 78 'server_query_interval_seconds') | 79 'server_query_interval_seconds') |
| 79 | |
| 80 task_started = False | 80 task_started = False |
| 81 task_completed = False | 81 task_completed = False |
| 82 tests_statuses = {} | 82 tests_statuses = {} |
| 83 step_name_no_platform = None | 83 step_name_no_platform = None |
| 84 task = self._GetSwarmingTask(*call_args) | 84 task = self._GetSwarmingTask(*call_args) |
| 85 | 85 |
| 86 while not task_completed: | 86 while not task_completed: |
| 87 data = swarming_util.GetSwarmingTaskResultById(task_id, self.HTTP_CLIENT) | 87 data, error = swarming_util.GetSwarmingTaskResultById( |
| 88 task_id, self.HTTP_CLIENT) |
| 89 |
| 90 if error: |
| 91 # An error occurred when trying to contact the swarming server. |
| 92 task.status = analysis_status.ERROR |
| 93 task.error = error |
| 94 task.put() |
| 95 break |
| 96 |
| 88 task_state = data['state'] | 97 task_state = data['state'] |
| 89 exit_code = (data.get('exit_code') if | 98 exit_code = (data.get('exit_code') if |
| 90 task_state == swarming_util.STATE_COMPLETED else None) | 99 task_state == swarming_util.STATE_COMPLETED else None) |
| 91 step_name_no_platform = ( | 100 step_name_no_platform = ( |
| 92 step_name_no_platform or swarming_util.GetTagValue( | 101 step_name_no_platform or swarming_util.GetTagValue( |
| 93 data.get('tags', {}), 'ref_name')) | 102 data.get('tags', {}), 'ref_name')) |
| 94 | 103 |
| 95 if task_state not in swarming_util.STATES_RUNNING: | 104 if task_state not in swarming_util.STATES_RUNNING: |
| 96 task_completed = True | 105 task_completed = True |
| 97 | 106 |
| 98 if (task_state == swarming_util.STATE_COMPLETED and | 107 if (task_state == swarming_util.STATE_COMPLETED and |
| 99 int(exit_code) != swarming_util.TASK_FAILED): | 108 int(exit_code) != swarming_util.TASK_FAILED): |
| 100 outputs_ref = data.get('outputs_ref') | 109 outputs_ref = data.get('outputs_ref') |
| 101 output_json = swarming_util.GetSwarmingTaskFailureLog( | 110 output_json, error = swarming_util.GetSwarmingTaskFailureLog( |
| 102 outputs_ref, self.HTTP_CLIENT) | 111 outputs_ref, self.HTTP_CLIENT) |
| 112 |
| 113 if error: |
| 114 task.status = analysis_status.ERROR |
| 115 task.error = error |
| 116 else: |
| 117 task.status = analysis_status.COMPLETED |
| 118 |
| 103 tests_statuses = self._CheckTestsRunStatuses(output_json, *call_args) | 119 tests_statuses = self._CheckTestsRunStatuses(output_json, *call_args) |
| 104 task.status = analysis_status.COMPLETED | |
| 105 task.tests_statuses = tests_statuses | 120 task.tests_statuses = tests_statuses |
| 121 task.put() |
| 106 else: | 122 else: |
| 123 if exit_code is not None: |
| 124 # Swarming task completed, but the task failed. |
| 125 code = int(exit_code) |
| 126 message = swarming_util.EXIT_CODE_DESCRIPTIONS[code] |
| 127 else: |
| 128 # The swarming task did not complete. |
| 129 code = swarming_util.STATES_NOT_RUNNING_TO_ERROR_CODES[task_state] |
| 130 message = task_state |
| 131 |
| 107 task.status = analysis_status.ERROR | 132 task.status = analysis_status.ERROR |
| 133 task.error = { |
| 134 'code': code, |
| 135 'message': message |
| 136 } |
| 137 task.put() |
| 138 |
| 108 logging_str = 'Swarming task stopped with status: %s' % task_state | 139 logging_str = 'Swarming task stopped with status: %s' % task_state |
| 109 if exit_code: # pragma: no cover | 140 if exit_code: # pragma: no cover |
| 110 logging_str += ' and exit_code: %s - %s' % ( | 141 logging_str += ' and exit_code: %s - %s' % ( |
| 111 exit_code, swarming_util.EXIT_CODE_DESCRIPTIONS[int(exit_code)]) | 142 exit_code, swarming_util.EXIT_CODE_DESCRIPTIONS[code]) |
| 112 logging.error(logging_str) | 143 logging.error(logging_str) |
| 113 | 144 |
| 114 tags = data.get('tags', {}) | 145 tags = data.get('tags', {}) |
| 115 priority_str = swarming_util.GetTagValue(tags, 'priority') | 146 priority_str = swarming_util.GetTagValue(tags, 'priority') |
| 116 if priority_str: | 147 if priority_str: |
| 117 task.parameters['priority'] = int(priority_str) | 148 task.parameters['priority'] = int(priority_str) |
| 118 | 149 |
| 119 task.put() | 150 task.put() |
| 120 else: # pragma: no cover | 151 else: # pragma: no cover |
| 121 if task_state == 'RUNNING' and not task_started: | 152 if task_state == 'RUNNING' and not task_started: |
| 122 # swarming task just starts, update status. | 153 # swarming task just starts, update status. |
| 123 task_started = True | 154 task_started = True |
| 124 task.status = analysis_status.RUNNING | 155 task.status = analysis_status.RUNNING |
| 125 task.put() | 156 task.put() |
| 126 time.sleep(server_query_interval_seconds) | 157 time.sleep(server_query_interval_seconds) |
| 127 | 158 |
| 128 # Timeout. | 159 # Timeout. |
| 129 if time.time() > deadline: | 160 if time.time() > deadline: |
| 130 # Updates status as ERROR. | 161 # Updates status as ERROR. |
| 131 task.status = analysis_status.ERROR | 162 task.status = analysis_status.ERROR |
| 163 task.error = { |
| 164 'code': swarming_util.TIMED_OUT, |
| 165 'message': 'Process swarming task result timed out' |
| 166 } |
| 132 task.put() | 167 task.put() |
| 133 logging.error('Swarming task timed out after %d hours.' % timeout_hours) | 168 logging.error('Swarming task timed out after %d hours.' % timeout_hours) |
| 134 break # Stops the loop and return. | 169 break # Stops the loop and return. |
| 135 | 170 |
| 136 # Update swarming task metadata timestamps. | 171 # Update swarming task metadata. |
| 137 task.created_time = self._ConvertDateTime(data.get('created_ts')) | 172 task.created_time = (task.created_time or |
| 138 task.started_time = self._ConvertDateTime(data.get('started_ts')) | 173 self._ConvertDateTime(data.get('created_ts'))) |
| 139 task.completed_time = self._ConvertDateTime(data.get('completed_ts')) | 174 task.started_time = (task.started_time or |
| 175 self._ConvertDateTime(data.get('started_ts'))) |
| 176 task.completed_time = (task.completed_time or |
| 177 self._ConvertDateTime(data.get('completed_ts'))) |
| 140 task.put() | 178 task.put() |
| 141 | 179 |
| 142 return step_name_no_platform | 180 return step_name_no_platform |
| 143 | 181 |
| 144 # Arguments number differs from overridden method - pylint: disable=W0221 | 182 # Arguments number differs from overridden method - pylint: disable=W0221 |
| 145 def run(self, master_name, builder_name, build_number, step_name, task_id, | 183 def run(self, master_name, builder_name, build_number, step_name, task_id, |
| 146 *args): | 184 *args): |
| 147 """Monitors a swarming task. | 185 """Monitors a swarming task. |
| 148 | 186 |
| 149 Args: | 187 Args: |
| 150 master_name (str): The master name. | 188 master_name (str): The master name. |
| 151 builder_name (str): The builder name. | 189 builder_name (str): The builder name. |
| 152 build_number (str): The build number. | 190 build_number (str): The build number. |
| 153 step_name (str): The failed test step name. | 191 step_name (str): The failed test step name. |
| 154 task_id (str): The task id to query the swarming server on the progresss | 192 task_id (str): The task id to query the swarming server on the progresss |
| 155 of a swarming task. | 193 of a swarming task. |
| 156 | 194 |
| 157 Returns: | 195 Returns: |
| 158 A dict of lists for reliable/flaky tests. | 196 A dict of lists for reliable/flaky tests. |
| 159 """ | 197 """ |
| 160 call_args = self._GetArgs(master_name, builder_name, build_number, | 198 call_args = self._GetArgs(master_name, builder_name, build_number, |
| 161 step_name, *args) | 199 step_name, *args) |
| 162 step_name_no_platform = self._MonitorSwarmingTask(task_id, *call_args) | 200 step_name_no_platform = self._MonitorSwarmingTask(task_id, *call_args) |
| 163 return step_name, step_name_no_platform | 201 return step_name, step_name_no_platform |
| 164 | |
| OLD | NEW |