appengine/findit/waterfall/flake/recursive_flake_pipeline.py - Issue 2376573004: [Findit] For automatic analyses of flaky tests, run the Swarming tasks off PST peak hours.

Side by Side Diff: appengine/findit/waterfall/flake/recursive_flake_pipeline.py

Issue 2376573004: [Findit] For automatic analyses of flaky tests, run the Swarming tasks off PST peak hours. (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« appengine/findit/common/time_util.py ('K') | « appengine/findit/waterfall/flake/initialize_flake_pipeline.py ('k') | appengine/findit/waterfall/flake/test/recursive_flake_pipeline_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

	5 from datetime import timedelta

	6 import random

	7

5 from common import appengine_util	8 from common import appengine_util

6 from common import constants	9 from common import constants

	10 from common import time_util

7 from common.pipeline_wrapper import BasePipeline	11 from common.pipeline_wrapper import BasePipeline

8	12

9 from model import analysis_status	13 from model import analysis_status

10 from model.flake.flake_swarming_task import FlakeSwarmingTask	14 from model.flake.flake_swarming_task import FlakeSwarmingTask

11 from model.flake.master_flake_analysis import MasterFlakeAnalysis	15 from model.flake.master_flake_analysis import MasterFlakeAnalysis

12 from waterfall import waterfall_config	16 from waterfall import waterfall_config

13 from waterfall.process_flake_swarming_task_result_pipeline import (	17 from waterfall.process_flake_swarming_task_result_pipeline import (

14 ProcessFlakeSwarmingTaskResultPipeline)	18 ProcessFlakeSwarmingTaskResultPipeline)

15 from waterfall.trigger_flake_swarming_task_pipeline import (	19 from waterfall.trigger_flake_swarming_task_pipeline import (

16 TriggerFlakeSwarmingTaskPipeline)	20 TriggerFlakeSwarmingTaskPipeline)

17	21

18	22

	23 def _GetETAToStartAnalysis(manually_triggered):

	24 """Returns an ETA as of a UTC datetime.datetime to start the analysis.

	25

	26 If not urgent, Swarming tasks should be run off PST peak hours from 11am to

	27 6pm on workdays.

	28

	29 Args:

	30 manually_triggered (bool): True if the analysis is from manual request, like

	31 by a Chromium sheriff.

	32

	33 Returns:

	34 The ETA as of a UTC datetime.datetime to start the analysis.

	35 """

	36 if manually_triggered:

	37 # If the analysis is manually triggered, run it right away.

	38 return time_util.GetUTCNow()

	39

	40 now_at_mtv = time_util.GetDatetimeInTimezone(

	41 'US/Pacific', time_util.GetUTCNowWithTimezone())

	42 if now_at_mtv.weekday() >= 5: # PST Saturday or Sunday.

	43 return time_util.GetUTCNow()

	44

	45 if now_at_mtv.hour < 11 or now_at_mtv.hour >= 18: # Before 11am or after 6pm.

	46 return time_util.GetUTCNow()

	47

	48 # Set ETA time to 6pm, with a random latency within 30 minutes to avoid sudden

	49 # burst traffic to Swarming.

	50 diff = timedelta(hours=18 - now_at_mtv.hour,
	lijeffrey 2016/09/27 23:48:05 nit: since diff is actually a random latency, why nit: since diff is actually a random latency, why not rename random_latency? stgao 2016/10/01 05:55:25 It is not exact a random latency. It is a diff + a Show quoted text On 2016/09/27 23:48:05, lijeffrey wrote: > nit: since diff is actually a random latency, why not rename random_latency? It is not exact a random latency. It is a diff + a random. I'd keep as is. Comment updated to avoid confusion.
	51 minutes=-now_at_mtv.minute,

	52 seconds=-now_at_mtv.second + random.randint(0, 30 * 60),

	53 microseconds=-now_at_mtv.microsecond)

	54 eta = now_at_mtv + diff

	55

	56 # Convert back to UTC.

	57 return time_util.GetDatetimeInTimezone('UTC', eta)

	58

	59

19 class RecursiveFlakePipeline(BasePipeline):	60 class RecursiveFlakePipeline(BasePipeline):

20	61

	62 def __init__(self, args, *kwargs):

	63 super(RecursiveFlakePipeline, self).__init__(args, *kwargs)

	64 self.manually_triggered = kwargs.get('manually_triggered', False)

	65

	66 def StartOffPSTPeakHours(self, args, *kwargs):

	67 """Starts the pipeline off PST peak hours if not triggered manually."""

	68 kwargs['eta'] = _GetETAToStartAnalysis(self.manually_triggered)

	69 self.start(args, *kwargs)

	70

21 # Arguments number differs from overridden method - pylint: disable=W0221	71 # Arguments number differs from overridden method - pylint: disable=W0221

22 def run(self, master_name, builder_name, run_build_number, step_name,	72 def run(self, master_name, builder_name, run_build_number, step_name,

23 test_name, master_build_number, flakiness_algorithm_results_dict,	73 test_name, master_build_number, flakiness_algorithm_results_dict,

24 queue_name=constants.DEFAULT_QUEUE):	74 manually_triggered=False):

25 """Pipeline to determine the regression range of a flaky test.	75 """Pipeline to determine the regression range of a flaky test.

26	76

27 Args:	77 Args:

28 master_name (str): The master name.	78 master_name (str): The master name.

29 builder_name (str): The builder name.	79 builder_name (str): The builder name.

30 run_build_number (int): The build number of the current swarming rerun.	80 run_build_number (int): The build number of the current swarming rerun.

31 step_name (str): The step name.	81 step_name (str): The step name.

32 test_name (str): The test name.	82 test_name (str): The test name.

33 master_build_number (int): The build number of the Master_Flake_analysis.	83 master_build_number (int): The build number of the Master_Flake_analysis.

34 flakiness_algorithm_results_dict (dict): A dictionary used by	84 flakiness_algorithm_results_dict (dict): A dictionary used by

35 NextBuildNumberPipeline	85 NextBuildNumberPipeline

36 queue_name (str): Which queue to run on.	86 manually_triggered (bool): True if the analysis is from manual request,

	87 like by a Chromium sheriff.

37	88

38 Returns:	89 Returns:

39 A dict of lists for reliable/flaky tests.	90 A dict of lists for reliable/flaky tests.

40 """	91 """

41 master = MasterFlakeAnalysis.Get(master_name, builder_name,	92 master = MasterFlakeAnalysis.Get(master_name, builder_name,

42 master_build_number, step_name, test_name)	93 master_build_number, step_name, test_name)

43 if master.status != analysis_status.RUNNING: # pragma: no branch	94 if master.status != analysis_status.RUNNING: # pragma: no branch

44 master.status = analysis_status.RUNNING	95 master.status = analysis_status.RUNNING

45 master.put()	96 master.put()

46	97

47 # Call trigger pipeline (flake style).	98 # Call trigger pipeline (flake style).

48 task_id = yield TriggerFlakeSwarmingTaskPipeline(	99 task_id = yield TriggerFlakeSwarmingTaskPipeline(

49 master_name, builder_name, run_build_number, step_name, [test_name])	100 master_name, builder_name, run_build_number, step_name, [test_name])

50 # Pass the trigger pipeline into a process pipeline.	101 # Pass the trigger pipeline into a process pipeline.

51 test_result_future = yield ProcessFlakeSwarmingTaskResultPipeline(	102 test_result_future = yield ProcessFlakeSwarmingTaskResultPipeline(

52 master_name, builder_name, run_build_number,	103 master_name, builder_name, run_build_number,

53 step_name, task_id, master_build_number, test_name)	104 step_name, task_id, master_build_number, test_name)

54 yield NextBuildNumberPipeline(	105 yield NextBuildNumberPipeline(

55 master_name, builder_name, master_build_number, run_build_number,	106 master_name, builder_name, master_build_number, run_build_number,

56 step_name, test_name, test_result_future, queue_name,	107 step_name, test_name, test_result_future,

57 flakiness_algorithm_results_dict)	108 flakiness_algorithm_results_dict, manually_triggered=manually_triggered)

58	109

59	110

60 def get_next_run(master, flakiness_algorithm_results_dict):	111 def get_next_run(master, flakiness_algorithm_results_dict):

61 # A description of this algorithm can be found at:	112 # A description of this algorithm can be found at:

62 # https://docs.google.com/document/d/1wPYFZ5OT998Yn7O8wGDOhgfcQ98mknoX13AesJaS 6ig/edit	113 # https://docs.google.com/document/d/1wPYFZ5OT998Yn7O8wGDOhgfcQ98mknoX13AesJaS 6ig/edit

63 # Get the last result.	114 # Get the last result.

64 last_result = master.success_rates[-1]	115 last_result = master.success_rates[-1]

65 cur_run = min(master.build_numbers)	116 cur_run = min(master.build_numbers)

66 flake_settings = waterfall_config.GetCheckFlakeSettings()	117 flake_settings = waterfall_config.GetCheckFlakeSettings()

67 lower_flake_threshold = flake_settings.get('lower_flake_threshold')	118 lower_flake_threshold = flake_settings.get('lower_flake_threshold')

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
149 return (flakiness_algorithm_results_dict['lower_boundary'] +	200 return (flakiness_algorithm_results_dict['lower_boundary'] +

150 flakiness_algorithm_results_dict['sequential_run_index'])	201 flakiness_algorithm_results_dict['sequential_run_index'])

151	202

152	203

153 class NextBuildNumberPipeline(BasePipeline):	204 class NextBuildNumberPipeline(BasePipeline):

154	205

155 # Arguments number differs from overridden method - pylint: disable=W0221	206 # Arguments number differs from overridden method - pylint: disable=W0221

156 # Unused argument - pylint: disable=W0613	207 # Unused argument - pylint: disable=W0613

157 def run(self, master_name, builder_name, master_build_number,	208 def run(self, master_name, builder_name, master_build_number,

158 run_build_number, step_name, test_name, test_result_future,	209 run_build_number, step_name, test_name, test_result_future,

159 queue_name, flakiness_algorithm_results_dict):	210 flakiness_algorithm_results_dict, manually_triggered=False):

160	211

161 # Get MasterFlakeAnalysis success list corresponding to parameters.	212 # Get MasterFlakeAnalysis success list corresponding to parameters.

162 master = MasterFlakeAnalysis.Get(master_name, builder_name,	213 master = MasterFlakeAnalysis.Get(master_name, builder_name,

163 master_build_number, step_name, test_name)	214 master_build_number, step_name, test_name)

164 # Don't call another pipeline if we fail.	215 # Don't call another pipeline if we fail.

165 flake_swarming_task = FlakeSwarmingTask.Get(	216 flake_swarming_task = FlakeSwarmingTask.Get(

166 master_name, builder_name, run_build_number, step_name, test_name)	217 master_name, builder_name, run_build_number, step_name, test_name)

167	218

168 if flake_swarming_task.status == analysis_status.ERROR:	219 if flake_swarming_task.status == analysis_status.ERROR:

169 master.status = analysis_status.ERROR	220 master.status = analysis_status.ERROR

170 master.put()	221 master.put()

171 return	222 return

172	223

173 # Figure out what build_number we should call, if any	224 # Figure out what build_number we should call, if any

174 if (flakiness_algorithm_results_dict['stabled_out'] and	225 if (flakiness_algorithm_results_dict['stabled_out'] and

175 flakiness_algorithm_results_dict['flaked_out']):	226 flakiness_algorithm_results_dict['flaked_out']):

176 next_run = sequential_next_run(master, flakiness_algorithm_results_dict)	227 next_run = sequential_next_run(master, flakiness_algorithm_results_dict)

177 else:	228 else:

178 next_run = get_next_run(master, flakiness_algorithm_results_dict)	229 next_run = get_next_run(master, flakiness_algorithm_results_dict)

179	230

180 if next_run < flakiness_algorithm_results_dict['last_build_number']:	231 if next_run < flakiness_algorithm_results_dict['last_build_number']:

181 next_run = 0	232 next_run = 0

182 elif next_run >= master_build_number:	233 elif next_run >= master_build_number:

183 next_run = 0	234 next_run = 0

184	235

185 if next_run:	236 if next_run:

186 pipeline_job = RecursiveFlakePipeline(	237 pipeline_job = RecursiveFlakePipeline(

187 master_name, builder_name, next_run, step_name, test_name,	238 master_name, builder_name, next_run, step_name, test_name,

188 master_build_number,	239 master_build_number,

189 flakiness_algorithm_results_dict=flakiness_algorithm_results_dict)	240 flakiness_algorithm_results_dict=flakiness_algorithm_results_dict,

	241 manually_triggered=manually_triggered)

190 # pylint: disable=W0201	242 # pylint: disable=W0201

191 pipeline_job.target = appengine_util.GetTargetNameForModule(	243 pipeline_job.target = appengine_util.GetTargetNameForModule(

192 constants.WATERFALL_BACKEND)	244 constants.WATERFALL_BACKEND)

193 pipeline_job.start(queue_name=queue_name)	245 pipeline_job.StartOffPSTPeakHours(

	246 queue_name=self.queue_name or constants.DEFAULT_QUEUE)

194 else:	247 else:

195 master.status = analysis_status.COMPLETED	248 master.status = analysis_status.COMPLETED

196 master.put()	249 master.put()

OLD	NEW