appengine/findit/waterfall/process_base_swarming_task_result_pipeline.py - Issue 2130543004: Waterfall components of regression range finder.

Side by Side Diff: appengine/findit/waterfall/process_base_swarming_task_result_pipeline.py

Issue 2130543004: Waterfall components of regression range finder. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« appengine/findit/waterfall/flake/recursive_flake_pipeline.py ('K') | « appengine/findit/waterfall/flake/recursive_flake_pipeline.py ('k') | appengine/findit/waterfall/process_flake_swarming_task_result_pipeline.py » ('j') | appengine/findit/waterfall/process_flake_swarming_task_result_pipeline.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from collections import defaultdict	5 from collections import defaultdict

6 import datetime	6 import datetime

7 import logging	7 import logging

8 import time	8 import time

9	9

10 from common.http_client_appengine import HttpClientAppengine as HttpClient	10 from common.http_client_appengine import HttpClientAppengine as HttpClient

11 from common.pipeline_wrapper import BasePipeline	11 from common.pipeline_wrapper import BasePipeline

12 from model import analysis_status	12 from model import analysis_status

13 from model.wf_swarming_task import WfSwarmingTask	13 from model.wf_swarming_task import WfSwarmingTask

	14 from model.flake.flake_swarming_task_result import FlakeSwarmingTaskResult

14 from waterfall import swarming_util	15 from waterfall import swarming_util

15 from waterfall import waterfall_config	16 from waterfall import waterfall_config

16	17

17	18

18 def _CheckTestsRunStatuses(output_json):	19 class ProcessBaseSwarmingTaskResultPipeline(BasePipeline):

19 """Checks result status for each test run and saves the numbers accordingly.

20

21 Args:

22 output_json (dict): A dict of all test results in the swarming task.

23

24 Returns:

25 tests_statuses (dict): A dict of different statuses for each test.

26

27 Currently for each test, we are saving number of total runs,

28 number of succeeded runs and number of failed runs.

29 """

30 tests_statuses = defaultdict(lambda: defaultdict(int))

31 if output_json:

32 for iteration in output_json.get('per_iteration_data'):

33 for test_name, tests in iteration.iteritems():

34 tests_statuses[test_name]['total_run'] += len(tests)

35 for test in tests:

36 tests_statuses[test_name][test['status']] += 1

37

38 return tests_statuses

39

40

41 def _ConvertDateTime(time_string):

42 """Convert UTC time string to datetime.datetime."""

43 # Match the time convertion with swarming.py.

44 # According to swarming.py,

45 # when microseconds are 0, the '.123456' suffix is elided.

46 if not time_string:

47 return None

48 for fmt in ('%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S'):

49 try:

50 return datetime.datetime.strptime(time_string, fmt)

51 except ValueError:

52 pass

53 raise ValueError('Failed to parse %s' % time_string) # pragma: no cover

54

55

56 class ProcessSwarmingTaskResultPipeline(BasePipeline):

57 """A pipeline for monitoring swarming task and processing task result.	20 """A pipeline for monitoring swarming task and processing task result.

58	21

59 This pipeline waits for result for a swarming task and processes the result to	22 This pipeline waits for result for a swarming task and processes the result to

60 generate a dict for statuses for each test run.	23 generate a dict for statuses for each test run.

61 """	24 """

	25 def _CheckTestsRunStatuses(self, output_json):

	26 """Checks result status for each test run and saves the numbers accordingly.

	27

	28 Args:

	29 output_json (dict): A dict of all test results in the swarming task.
	stgao 2016/07/09 00:04:33 style nit: indent. Args: arg_name (type): expla style nit: indent. Args: arg_name (type): explanation. You may also run pylint to check for most style violations: pylint path/to/python_file.py caiw 2016/07/14 00:59:44 Done. Show quoted text On 2016/07/09 00:04:33, stgao wrote: > style nit: indent. > > Args: > arg_name (type): explanation. > > You may also run pylint to check for most style violations: > pylint path/to/python_file.py Done.
	30

	31 Returns:

	32 tests_statuses (dict): A dict of different statuses for each test.

	33

	34 Currently for each test, we are saving number of total runs,

	35 number of succeeded runs and number of failed runs.

	36 """

	37 tests_statuses = defaultdict(lambda: defaultdict(int))

	38 if output_json:

	39 for iteration in output_json.get('per_iteration_data'):

	40 for test_name, tests in iteration.iteritems():

	41 tests_statuses[test_name]['total_run'] += len(tests)

	42 for test in tests:

	43 tests_statuses[test_name][test['status']] += 1

	44 return tests_statuses

	45

	46 def _ConvertDateTime(self, time_string):

	47 """Convert UTC time string to datetime.datetime."""

	48 # Match the time convertion with swarming.py.

	49 # According to swarming.py,

	50 # when microseconds are 0, the '.123456' suffix is elided.

	51 if not time_string:

	52 return None

	53 for fmt in ('%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S'):

	54 try:

	55 return datetime.datetime.strptime(time_string, fmt)

	56 except ValueError:

	57 pass

	58 raise ValueError('Failed to parse %s' % time_string) # pragma: no cover

	59

	60 def _GetSwarmingTask(

	61 self,master_name, builder_name, step_name, build_number):

	62 # Get the appropriate kind of Swarming Task (Wf or Flake)

	63 # Should be overwritten by subclass

	64 pass
	stgao 2016/07/09 00:04:33 How about raising an NotImplementedError here as t How about raising an NotImplementedError here as the subclass is expected to implement it? caiw 2016/07/14 00:59:44 Done. Show quoted text On 2016/07/09 00:04:33, stgao wrote: > How about raising an NotImplementedError here as the subclass is expected to > implement it? Done.
62	65

63 HTTP_CLIENT = HttpClient()	66 HTTP_CLIENT = HttpClient()

64 # Arguments number differs from overridden method - pylint: disable=W0221	67 # Arguments number differs from overridden method - pylint: disable=W0221

65 def run(self, master_name, builder_name, build_number, step_name, task_id):	68 def run(self, master_name, builder_name, build_number, step_name, task_id):

66 """	69 """

67 Args:	70 Args:

68 master_name (str): The master name.	71 master_name (str): The master name.

69 builder_name (str): The builder name.	72 builder_name (str): The builder name.

70 build_number (str): The build number.	73 build_number (str): The build number.

71 step_name (str): The failed test step name.	74 step_name (str): The failed test step name.

72 task_id (str): Id for the swarming task which is triggered by Findit.	75 task_id (str): Id for the swarming task which is triggered by Findit.

73	76

74 Returns:	77 Returns:

75 A dict of lists for reliable/flaky tests.	78 A dict of lists for reliable/flaky tests.

76 """	79 """

77

78 assert task_id	80 assert task_id

79 timeout_hours = waterfall_config.GetSwarmingSettings().get(	81 timeout_hours = waterfall_config.GetSwarmingSettings().get(

80 'task_timeout_hours')	82 'task_timeout_hours')

81 deadline = time.time() + timeout_hours * 60 * 60	83 deadline = time.time() + timeout_hours * 60 * 60

82 server_query_interval_seconds = waterfall_config.GetSwarmingSettings().get(	84 server_query_interval_seconds = waterfall_config.GetSwarmingSettings().get(

83 'server_query_interval_seconds')	85 'server_query_interval_seconds')

84

85 task_started = False	86 task_started = False

86 task_completed = False	87 task_completed = False

87 tests_statuses = {}	88 tests_statuses = {}

88 step_name_no_platform = None	89 step_name_no_platform = None

89	90

90 while not task_completed:	91 while not task_completed:

91 # Keeps monitoring the swarming task, waits for it to complete.	92 # Keeps monitoring the swarming task, waits for it to complete.

92 data = swarming_util.GetSwarmingTaskResultById(	93 data = swarming_util.GetSwarmingTaskResultById(

93 task_id, self.HTTP_CLIENT)	94 task_id, self.HTTP_CLIENT)

94 task_state = data['state']	95 task_state = data['state']

95 step_name_no_platform = swarming_util.GetTagValue(	96 step_name_no_platform = swarming_util.GetTagValue(

96 data.get('tags', {}), 'ref_name')	97 data.get('tags', {}), 'ref_name')

97 if task_state not in swarming_util.STATES_RUNNING:	98 if task_state not in swarming_util.STATES_RUNNING:

98 task_completed = True	99 task_completed = True

99 task = WfSwarmingTask.Get(	100 task = self._GetSwarmingTask(

100 master_name, builder_name, build_number, step_name)	101 master_name, builder_name, step_name, build_number)

101 if task_state == swarming_util.STATE_COMPLETED:	102 if task_state == swarming_util.STATE_COMPLETED:

102 outputs_ref = data.get('outputs_ref')	103 outputs_ref = data.get('outputs_ref')

103 output_json = swarming_util.GetSwarmingTaskFailureLog(	104 output_json = swarming_util.GetSwarmingTaskFailureLog(

104 outputs_ref, self.HTTP_CLIENT)	105 outputs_ref, self.HTTP_CLIENT)

105 tests_statuses = _CheckTestsRunStatuses(output_json)	106 # This following line will not be compatible!

106	107 tests_statuses = self._CheckTestsRunStatuses(

	108 output_json, master_name, builder_name,

	109 build_number, step_name)

107 task.status = analysis_status.COMPLETED	110 task.status = analysis_status.COMPLETED

108 task.tests_statuses = tests_statuses	111 task.tests_statuses = tests_statuses

109 else:	112 else:

110 task.status = analysis_status.ERROR	113 task.status = analysis_status.ERROR

111 logging.error('Swarming task stopped with status: %s' % (	114 logging.error('Swarming task stopped with status: %s' % (

112 task_state))	115 task_state))

113 priority_str = swarming_util.GetTagValue(	116 priority_str = swarming_util.GetTagValue(

114 data.get('tags', {}), 'priority')	117 data.get('tags', {}), 'priority')

115 if priority_str:	118 if priority_str:

116 task.parameters['priority'] = int(priority_str)	119 task.parameters['priority'] = int(priority_str)

117 task.put()	120 task.put()

118 else: # pragma: no cover	121 else: # pragma: no cover

119 if task_state == 'RUNNING' and not task_started:	122 if task_state == 'RUNNING' and not task_started:

120 # swarming task just starts, update status.	123 # swarming task just starts, update status.

121 task_started = True	124 task_started = True

122 task = WfSwarmingTask.Get(	125 task = self._GetSwarmingTask(

123 master_name, builder_name, build_number, step_name)	126 master_name, builder_name, step_name, build_number)

124 task.status = analysis_status.RUNNING	127 task.status = analysis_status.RUNNING

125 task.put()	128 task.put()

126	129

127 time.sleep(server_query_interval_seconds)	130 time.sleep(server_query_interval_seconds)

128	131

129 if time.time() > deadline:	132 if time.time() > deadline:

130 # Updates status as ERROR.	133 # Updates status as ERROR.

131 task = WfSwarmingTask.Get(	134 task = self._GetSwarmingTask(

132 master_name, builder_name, build_number, step_name)	135 master_name, builder_name, step_name, build_number)

133 task.status = analysis_status.ERROR	136 task.status = analysis_status.ERROR

134 task.put()	137 task.put()

135 logging.error('Swarming task timed out after %d hours.' % timeout_hours)	138 logging.error('Swarming task timed out after %d hours.' % timeout_hours)

136 break # Stops the loop and return.	139 break # Stops the loop and return.

137

138 # Update swarming task metadate.	140 # Update swarming task metadate.

139 task = WfSwarmingTask.Get(	141 task = self._GetSwarmingTask(

140 master_name, builder_name, build_number, step_name)	142 master_name, builder_name, step_name, build_number)

141 task.created_time = _ConvertDateTime(data.get('created_ts'))	143 task.created_time = self._ConvertDateTime(data.get('created_ts'))

142 task.started_time = _ConvertDateTime(data.get('started_ts'))	144 task.started_time = self._ConvertDateTime(data.get('started_ts'))

143 task.completed_time = _ConvertDateTime(data.get('completed_ts'))	145 task.completed_time = self._ConvertDateTime(data.get('completed_ts'))

144 task.put()	146 task.put()

145	147

146 return step_name, (step_name_no_platform, task.classified_tests)	148 return step_name, step_name_no_platform

OLD	NEW