Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Side by Side Diff: appengine/findit/waterfall/flake/recursive_flake_pipeline.py

Issue 2376573004: [Findit] For automatic analyses of flaky tests, run the Swarming tasks off PST peak hours. (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from datetime import timedelta
6 import random
7
5 from common import appengine_util 8 from common import appengine_util
6 from common import constants 9 from common import constants
10 from common import time_util
7 from common.pipeline_wrapper import BasePipeline 11 from common.pipeline_wrapper import BasePipeline
8 12
9 from model import analysis_status 13 from model import analysis_status
10 from model.flake.flake_swarming_task import FlakeSwarmingTask 14 from model.flake.flake_swarming_task import FlakeSwarmingTask
11 from model.flake.master_flake_analysis import MasterFlakeAnalysis 15 from model.flake.master_flake_analysis import MasterFlakeAnalysis
12 from waterfall import waterfall_config 16 from waterfall import waterfall_config
13 from waterfall.process_flake_swarming_task_result_pipeline import ( 17 from waterfall.process_flake_swarming_task_result_pipeline import (
14 ProcessFlakeSwarmingTaskResultPipeline) 18 ProcessFlakeSwarmingTaskResultPipeline)
15 from waterfall.trigger_flake_swarming_task_pipeline import ( 19 from waterfall.trigger_flake_swarming_task_pipeline import (
16 TriggerFlakeSwarmingTaskPipeline) 20 TriggerFlakeSwarmingTaskPipeline)
17 21
18 22
23 def _GetETAToStartAnalysis(manually_triggered):
24 """Returns an ETA as of a UTC datetime.datetime to start the analysis.
25
26 If not urgent, Swarming tasks should be run off PST peak hours from 11am to
27 6pm on workdays.
28
29 Args:
30 manually_triggered (bool): True if the analysis is from manual request, like
31 by a Chromium sheriff.
32
33 Returns:
34 The ETA as of a UTC datetime.datetime to start the analysis.
35 """
36 if manually_triggered:
37 # If the analysis is manually triggered, run it right away.
38 return time_util.GetUTCNow()
39
40 now_at_mtv = time_util.GetDatetimeInTimezone(
41 'US/Pacific', time_util.GetUTCNowWithTimezone())
42 if now_at_mtv.weekday() >= 5: # PST Saturday or Sunday.
43 return time_util.GetUTCNow()
44
45 if now_at_mtv.hour < 11 or now_at_mtv.hour >= 18: # Before 11am or after 6pm.
46 return time_util.GetUTCNow()
47
48 # Set ETA time to 6pm, with a random latency within 30 minutes to avoid sudden
49 # burst traffic to Swarming.
50 diff = timedelta(hours=18 - now_at_mtv.hour,
lijeffrey 2016/09/27 23:48:05 nit: since diff is actually a random latency, why
stgao 2016/10/01 05:55:25 It is not exact a random latency. It is a diff + a
51 minutes=-now_at_mtv.minute,
52 seconds=-now_at_mtv.second + random.randint(0, 30 * 60),
53 microseconds=-now_at_mtv.microsecond)
54 eta = now_at_mtv + diff
55
56 # Convert back to UTC.
57 return time_util.GetDatetimeInTimezone('UTC', eta)
58
59
19 class RecursiveFlakePipeline(BasePipeline): 60 class RecursiveFlakePipeline(BasePipeline):
20 61
62 def __init__(self, *args, **kwargs):
63 super(RecursiveFlakePipeline, self).__init__(*args, **kwargs)
64 self.manually_triggered = kwargs.get('manually_triggered', False)
65
66 def StartOffPSTPeakHours(self, *args, **kwargs):
67 """Starts the pipeline off PST peak hours if not triggered manually."""
68 kwargs['eta'] = _GetETAToStartAnalysis(self.manually_triggered)
69 self.start(*args, **kwargs)
70
21 # Arguments number differs from overridden method - pylint: disable=W0221 71 # Arguments number differs from overridden method - pylint: disable=W0221
22 def run(self, master_name, builder_name, run_build_number, step_name, 72 def run(self, master_name, builder_name, run_build_number, step_name,
23 test_name, master_build_number, flakiness_algorithm_results_dict, 73 test_name, master_build_number, flakiness_algorithm_results_dict,
24 queue_name=constants.DEFAULT_QUEUE): 74 manually_triggered=False):
25 """Pipeline to determine the regression range of a flaky test. 75 """Pipeline to determine the regression range of a flaky test.
26 76
27 Args: 77 Args:
28 master_name (str): The master name. 78 master_name (str): The master name.
29 builder_name (str): The builder name. 79 builder_name (str): The builder name.
30 run_build_number (int): The build number of the current swarming rerun. 80 run_build_number (int): The build number of the current swarming rerun.
31 step_name (str): The step name. 81 step_name (str): The step name.
32 test_name (str): The test name. 82 test_name (str): The test name.
33 master_build_number (int): The build number of the Master_Flake_analysis. 83 master_build_number (int): The build number of the Master_Flake_analysis.
34 flakiness_algorithm_results_dict (dict): A dictionary used by 84 flakiness_algorithm_results_dict (dict): A dictionary used by
35 NextBuildNumberPipeline 85 NextBuildNumberPipeline
36 queue_name (str): Which queue to run on. 86 manually_triggered (bool): True if the analysis is from manual request,
87 like by a Chromium sheriff.
37 88
38 Returns: 89 Returns:
39 A dict of lists for reliable/flaky tests. 90 A dict of lists for reliable/flaky tests.
40 """ 91 """
41 master = MasterFlakeAnalysis.Get(master_name, builder_name, 92 master = MasterFlakeAnalysis.Get(master_name, builder_name,
42 master_build_number, step_name, test_name) 93 master_build_number, step_name, test_name)
43 if master.status != analysis_status.RUNNING: # pragma: no branch 94 if master.status != analysis_status.RUNNING: # pragma: no branch
44 master.status = analysis_status.RUNNING 95 master.status = analysis_status.RUNNING
45 master.put() 96 master.put()
46 97
47 # Call trigger pipeline (flake style). 98 # Call trigger pipeline (flake style).
48 task_id = yield TriggerFlakeSwarmingTaskPipeline( 99 task_id = yield TriggerFlakeSwarmingTaskPipeline(
49 master_name, builder_name, run_build_number, step_name, [test_name]) 100 master_name, builder_name, run_build_number, step_name, [test_name])
50 # Pass the trigger pipeline into a process pipeline. 101 # Pass the trigger pipeline into a process pipeline.
51 test_result_future = yield ProcessFlakeSwarmingTaskResultPipeline( 102 test_result_future = yield ProcessFlakeSwarmingTaskResultPipeline(
52 master_name, builder_name, run_build_number, 103 master_name, builder_name, run_build_number,
53 step_name, task_id, master_build_number, test_name) 104 step_name, task_id, master_build_number, test_name)
54 yield NextBuildNumberPipeline( 105 yield NextBuildNumberPipeline(
55 master_name, builder_name, master_build_number, run_build_number, 106 master_name, builder_name, master_build_number, run_build_number,
56 step_name, test_name, test_result_future, queue_name, 107 step_name, test_name, test_result_future,
57 flakiness_algorithm_results_dict) 108 flakiness_algorithm_results_dict, manually_triggered=manually_triggered)
58 109
59 110
60 def get_next_run(master, flakiness_algorithm_results_dict): 111 def get_next_run(master, flakiness_algorithm_results_dict):
61 # A description of this algorithm can be found at: 112 # A description of this algorithm can be found at:
62 # https://docs.google.com/document/d/1wPYFZ5OT998Yn7O8wGDOhgfcQ98mknoX13AesJaS 6ig/edit 113 # https://docs.google.com/document/d/1wPYFZ5OT998Yn7O8wGDOhgfcQ98mknoX13AesJaS 6ig/edit
63 # Get the last result. 114 # Get the last result.
64 last_result = master.success_rates[-1] 115 last_result = master.success_rates[-1]
65 cur_run = min(master.build_numbers) 116 cur_run = min(master.build_numbers)
66 flake_settings = waterfall_config.GetCheckFlakeSettings() 117 flake_settings = waterfall_config.GetCheckFlakeSettings()
67 lower_flake_threshold = flake_settings.get('lower_flake_threshold') 118 lower_flake_threshold = flake_settings.get('lower_flake_threshold')
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 return (flakiness_algorithm_results_dict['lower_boundary'] + 200 return (flakiness_algorithm_results_dict['lower_boundary'] +
150 flakiness_algorithm_results_dict['sequential_run_index']) 201 flakiness_algorithm_results_dict['sequential_run_index'])
151 202
152 203
153 class NextBuildNumberPipeline(BasePipeline): 204 class NextBuildNumberPipeline(BasePipeline):
154 205
155 # Arguments number differs from overridden method - pylint: disable=W0221 206 # Arguments number differs from overridden method - pylint: disable=W0221
156 # Unused argument - pylint: disable=W0613 207 # Unused argument - pylint: disable=W0613
157 def run(self, master_name, builder_name, master_build_number, 208 def run(self, master_name, builder_name, master_build_number,
158 run_build_number, step_name, test_name, test_result_future, 209 run_build_number, step_name, test_name, test_result_future,
159 queue_name, flakiness_algorithm_results_dict): 210 flakiness_algorithm_results_dict, manually_triggered=False):
160 211
161 # Get MasterFlakeAnalysis success list corresponding to parameters. 212 # Get MasterFlakeAnalysis success list corresponding to parameters.
162 master = MasterFlakeAnalysis.Get(master_name, builder_name, 213 master = MasterFlakeAnalysis.Get(master_name, builder_name,
163 master_build_number, step_name, test_name) 214 master_build_number, step_name, test_name)
164 # Don't call another pipeline if we fail. 215 # Don't call another pipeline if we fail.
165 flake_swarming_task = FlakeSwarmingTask.Get( 216 flake_swarming_task = FlakeSwarmingTask.Get(
166 master_name, builder_name, run_build_number, step_name, test_name) 217 master_name, builder_name, run_build_number, step_name, test_name)
167 218
168 if flake_swarming_task.status == analysis_status.ERROR: 219 if flake_swarming_task.status == analysis_status.ERROR:
169 master.status = analysis_status.ERROR 220 master.status = analysis_status.ERROR
170 master.put() 221 master.put()
171 return 222 return
172 223
173 # Figure out what build_number we should call, if any 224 # Figure out what build_number we should call, if any
174 if (flakiness_algorithm_results_dict['stabled_out'] and 225 if (flakiness_algorithm_results_dict['stabled_out'] and
175 flakiness_algorithm_results_dict['flaked_out']): 226 flakiness_algorithm_results_dict['flaked_out']):
176 next_run = sequential_next_run(master, flakiness_algorithm_results_dict) 227 next_run = sequential_next_run(master, flakiness_algorithm_results_dict)
177 else: 228 else:
178 next_run = get_next_run(master, flakiness_algorithm_results_dict) 229 next_run = get_next_run(master, flakiness_algorithm_results_dict)
179 230
180 if next_run < flakiness_algorithm_results_dict['last_build_number']: 231 if next_run < flakiness_algorithm_results_dict['last_build_number']:
181 next_run = 0 232 next_run = 0
182 elif next_run >= master_build_number: 233 elif next_run >= master_build_number:
183 next_run = 0 234 next_run = 0
184 235
185 if next_run: 236 if next_run:
186 pipeline_job = RecursiveFlakePipeline( 237 pipeline_job = RecursiveFlakePipeline(
187 master_name, builder_name, next_run, step_name, test_name, 238 master_name, builder_name, next_run, step_name, test_name,
188 master_build_number, 239 master_build_number,
189 flakiness_algorithm_results_dict=flakiness_algorithm_results_dict) 240 flakiness_algorithm_results_dict=flakiness_algorithm_results_dict,
241 manually_triggered=manually_triggered)
190 # pylint: disable=W0201 242 # pylint: disable=W0201
191 pipeline_job.target = appengine_util.GetTargetNameForModule( 243 pipeline_job.target = appengine_util.GetTargetNameForModule(
192 constants.WATERFALL_BACKEND) 244 constants.WATERFALL_BACKEND)
193 pipeline_job.start(queue_name=queue_name) 245 pipeline_job.StartOffPSTPeakHours(
246 queue_name=self.queue_name or constants.DEFAULT_QUEUE)
194 else: 247 else:
195 master.status = analysis_status.COMPLETED 248 master.status = analysis_status.COMPLETED
196 master.put() 249 master.put()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698