Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from collections import defaultdict | 5 from collections import defaultdict |
| 6 import datetime | 6 import datetime |
| 7 import logging | 7 import logging |
| 8 import time | 8 import time |
| 9 | 9 |
| 10 from common.http_client_appengine import HttpClientAppengine as HttpClient | 10 from common.http_client_appengine import HttpClientAppengine as HttpClient |
| 11 from common.pipeline_wrapper import BasePipeline | 11 from common.pipeline_wrapper import BasePipeline |
| 12 from model import analysis_status | 12 from model import analysis_status |
| 13 from model.wf_swarming_task import WfSwarmingTask | 13 from model.wf_swarming_task import WfSwarmingTask |
| 14 from waterfall import swarming_util | 14 from waterfall import swarming_util |
| 15 from waterfall import waterfall_config | 15 from waterfall import waterfall_config |
| 16 | 16 |
| 17 | 17 |
| 18 def _CheckTestsRunStatuses(output_json): | 18 class ProcessBaseSwarmingTaskResultPipeline(BasePipeline): |
| 19 """Checks result status for each test run and saves the numbers accordingly. | |
| 20 | |
| 21 Args: | |
| 22 output_json (dict): A dict of all test results in the swarming task. | |
| 23 | |
| 24 Returns: | |
| 25 tests_statuses (dict): A dict of different statuses for each test. | |
| 26 | |
| 27 Currently for each test, we are saving number of total runs, | |
| 28 number of succeeded runs and number of failed runs. | |
| 29 """ | |
| 30 tests_statuses = defaultdict(lambda: defaultdict(int)) | |
| 31 if output_json: | |
| 32 for iteration in output_json.get('per_iteration_data'): | |
| 33 for test_name, tests in iteration.iteritems(): | |
| 34 tests_statuses[test_name]['total_run'] += len(tests) | |
| 35 for test in tests: | |
| 36 tests_statuses[test_name][test['status']] += 1 | |
| 37 | |
| 38 return tests_statuses | |
| 39 | |
| 40 | |
| 41 def _ConvertDateTime(time_string): | |
| 42 """Convert UTC time string to datetime.datetime.""" | |
| 43 # Match the time convertion with swarming.py. | |
| 44 # According to swarming.py, | |
| 45 # when microseconds are 0, the '.123456' suffix is elided. | |
| 46 if not time_string: | |
| 47 return None | |
| 48 for fmt in ('%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S'): | |
| 49 try: | |
| 50 return datetime.datetime.strptime(time_string, fmt) | |
| 51 except ValueError: | |
| 52 pass | |
| 53 raise ValueError('Failed to parse %s' % time_string) # pragma: no cover | |
| 54 | |
| 55 | |
| 56 class ProcessSwarmingTaskResultPipeline(BasePipeline): | |
| 57 """A pipeline for monitoring swarming task and processing task result. | 19 """A pipeline for monitoring swarming task and processing task result. |
| 58 | 20 |
| 59 This pipeline waits for result for a swarming task and processes the result to | 21 This pipeline waits for result for a swarming task and processes the result to |
| 60 generate a dict for statuses for each test run. | 22 generate a dict for statuses for each test run. |
| 61 """ | 23 """ |
| 62 | 24 |
| 63 HTTP_CLIENT = HttpClient() | 25 HTTP_CLIENT = HttpClient() |
| 26 | |
| 27 def _CheckTestsRunStatuses(self, output_json): | |
| 28 # Checks result status for each test run and saves the numbers accordingly. | |
| 29 # Should be overridden by subclass. | |
| 30 raise NotImplementedError | |
|
lijeffrey
2016/07/27 22:38:23
nit: message for where this should be implemented
caiw
2016/07/27 23:51:33
Done.
| |
| 31 | |
| 32 def _ConvertDateTime(self, time_string): | |
| 33 """Convert UTC time string to datetime.datetime.""" | |
| 34 # Match the time convertion with swarming.py. | |
| 35 # According to swarming.py, | |
|
lijeffrey
2016/07/27 22:38:22
nit: convertion -> conversion?
Also no need for s
caiw
2016/07/27 23:51:33
Haha I actually didn't write this but changed.
| |
| 36 # when microseconds are 0, the '.123456' suffix is elided. | |
| 37 if not time_string: | |
| 38 return None | |
| 39 for fmt in ('%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S'): | |
| 40 try: | |
| 41 return datetime.datetime.strptime(time_string, fmt) | |
| 42 except ValueError: | |
| 43 pass | |
| 44 raise ValueError('Failed to parse %s' % time_string) # pragma: no cover | |
| 45 | |
| 46 def _GetSwarmingTask(self): | |
| 47 # Get the appropriate kind of Swarming Task (Wf or Flake). | |
| 48 # Should be overwritten by subclass. | |
| 49 raise NotImplementedError('Must implement _GetSwarmingTask') | |
|
lijeffrey
2016/07/27 22:38:23
nit: "_GetSwarmingTask should be implemented in th
caiw
2016/07/27 23:51:33
Done.
| |
| 50 | |
| 51 def _GetArgs(self): | |
| 52 # Return list of arguments to call _CheckTestsRunStatuses with - output_json | |
| 53 # Should be overwritten by subclass. | |
| 54 raise NotImplementedError('Must implement _GetArgs') | |
| 55 | |
| 64 # Arguments number differs from overridden method - pylint: disable=W0221 | 56 # Arguments number differs from overridden method - pylint: disable=W0221 |
| 65 def run(self, master_name, builder_name, build_number, step_name, task_id): | 57 def run(self, master_name, builder_name, build_number, |
| 58 step_name, task_id, *args): | |
| 66 """ | 59 """ |
| 67 Args: | 60 Args: |
| 68 master_name (str): The master name. | 61 master_name (str): The master name. |
| 69 builder_name (str): The builder name. | 62 builder_name (str): The builder name. |
| 70 build_number (str): The build number. | 63 build_number (str): The build number. |
| 71 step_name (str): The failed test step name. | 64 step_name (str): The failed test step name. |
| 72 task_id (str): Id for the swarming task which is triggered by Findit. | 65 task_id (str): Id for the swarming task which is triggered by Findit. |
| 73 | 66 |
| 74 Returns: | 67 Returns: |
| 75 A dict of lists for reliable/flaky tests. | 68 A dict of lists for reliable/flaky tests. |
| 76 """ | 69 """ |
| 77 | 70 call_args = self._GetArgs(master_name, builder_name, build_number, |
| 71 step_name, *args) | |
| 78 assert task_id | 72 assert task_id |
| 79 timeout_hours = waterfall_config.GetSwarmingSettings().get( | 73 timeout_hours = waterfall_config.GetSwarmingSettings().get( |
| 80 'task_timeout_hours') | 74 'task_timeout_hours') |
| 81 deadline = time.time() + timeout_hours * 60 * 60 | 75 deadline = time.time() + timeout_hours * 60 * 60 |
| 82 server_query_interval_seconds = waterfall_config.GetSwarmingSettings().get( | 76 server_query_interval_seconds = waterfall_config.GetSwarmingSettings().get( |
| 83 'server_query_interval_seconds') | 77 'server_query_interval_seconds') |
| 84 | |
| 85 task_started = False | 78 task_started = False |
| 86 task_completed = False | 79 task_completed = False |
| 87 tests_statuses = {} | 80 tests_statuses = {} |
| 88 step_name_no_platform = None | 81 step_name_no_platform = None |
| 89 | 82 |
| 90 while not task_completed: | 83 while not task_completed: |
| 91 # Keeps monitoring the swarming task, waits for it to complete. | 84 # Keeps monitoring the swarming task, waits for it to complete. |
| 92 data = swarming_util.GetSwarmingTaskResultById( | 85 data = swarming_util.GetSwarmingTaskResultById( |
| 93 task_id, self.HTTP_CLIENT) | 86 task_id, self.HTTP_CLIENT) |
| 94 task_state = data['state'] | 87 task_state = data['state'] |
| 95 step_name_no_platform = swarming_util.GetTagValue( | 88 step_name_no_platform = swarming_util.GetTagValue( |
| 96 data.get('tags', {}), 'ref_name') | 89 data.get('tags', {}), 'ref_name') |
| 97 if task_state not in swarming_util.STATES_RUNNING: | 90 if task_state not in swarming_util.STATES_RUNNING: |
| 98 task_completed = True | 91 task_completed = True |
| 99 task = WfSwarmingTask.Get( | 92 task = self._GetSwarmingTask(*call_args) |
| 100 master_name, builder_name, build_number, step_name) | |
| 101 if task_state == swarming_util.STATE_COMPLETED: | 93 if task_state == swarming_util.STATE_COMPLETED: |
| 102 outputs_ref = data.get('outputs_ref') | 94 outputs_ref = data.get('outputs_ref') |
| 103 output_json = swarming_util.GetSwarmingTaskFailureLog( | 95 output_json = swarming_util.GetSwarmingTaskFailureLog( |
| 104 outputs_ref, self.HTTP_CLIENT) | 96 outputs_ref, self.HTTP_CLIENT) |
| 105 tests_statuses = _CheckTestsRunStatuses(output_json) | 97 tests_statuses = self._CheckTestsRunStatuses( |
| 106 | 98 output_json, *call_args) |
| 107 task.status = analysis_status.COMPLETED | 99 task.status = analysis_status.COMPLETED |
| 108 task.tests_statuses = tests_statuses | 100 task.tests_statuses = tests_statuses |
| 109 else: | 101 else: |
| 110 task.status = analysis_status.ERROR | 102 task.status = analysis_status.ERROR |
| 111 logging.error('Swarming task stopped with status: %s' % ( | 103 logging.error('Swarming task stopped with status: %s' % ( |
| 112 task_state)) | 104 task_state)) |
| 113 priority_str = swarming_util.GetTagValue( | 105 priority_str = swarming_util.GetTagValue( |
| 114 data.get('tags', {}), 'priority') | 106 data.get('tags', {}), 'priority') |
| 115 if priority_str: | 107 if priority_str: |
| 116 task.parameters['priority'] = int(priority_str) | 108 task.parameters['priority'] = int(priority_str) |
| 117 task.put() | 109 task.put() |
| 118 else: # pragma: no cover | 110 else: # pragma: no cover |
| 119 if task_state == 'RUNNING' and not task_started: | 111 if task_state == 'RUNNING' and not task_started: |
| 120 # swarming task just starts, update status. | 112 # swarming task just starts, update status. |
| 121 task_started = True | 113 task_started = True |
| 122 task = WfSwarmingTask.Get( | 114 task = self._GetSwarmingTask(*call_args) |
| 123 master_name, builder_name, build_number, step_name) | |
| 124 task.status = analysis_status.RUNNING | 115 task.status = analysis_status.RUNNING |
| 125 task.put() | 116 task.put() |
| 126 | |
| 127 time.sleep(server_query_interval_seconds) | 117 time.sleep(server_query_interval_seconds) |
| 128 | |
| 129 if time.time() > deadline: | 118 if time.time() > deadline: |
| 130 # Updates status as ERROR. | 119 # Updates status as ERROR. |
| 131 task = WfSwarmingTask.Get( | 120 task = self._GetSwarmingTask(*call_args) |
| 132 master_name, builder_name, build_number, step_name) | |
| 133 task.status = analysis_status.ERROR | 121 task.status = analysis_status.ERROR |
| 134 task.put() | 122 task.put() |
| 135 logging.error('Swarming task timed out after %d hours.' % timeout_hours) | 123 logging.error('Swarming task timed out after %d hours.' % timeout_hours) |
| 136 break # Stops the loop and return. | 124 break # Stops the loop and return. |
| 137 | |
| 138 # Update swarming task metadate. | 125 # Update swarming task metadate. |
| 139 task = WfSwarmingTask.Get( | 126 task = self._GetSwarmingTask(*call_args) |
| 140 master_name, builder_name, build_number, step_name) | 127 task.created_time = self._ConvertDateTime(data.get('created_ts')) |
| 141 task.created_time = _ConvertDateTime(data.get('created_ts')) | 128 task.started_time = self._ConvertDateTime(data.get('started_ts')) |
| 142 task.started_time = _ConvertDateTime(data.get('started_ts')) | 129 task.completed_time = self._ConvertDateTime(data.get('completed_ts')) |
| 143 task.completed_time = _ConvertDateTime(data.get('completed_ts')) | |
| 144 task.put() | 130 task.put() |
| 145 | 131 |
| 146 return step_name, (step_name_no_platform, task.classified_tests) | 132 return step_name, step_name_no_platform |
| OLD | NEW |