Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(76)

Side by Side Diff: appengine/findit/waterfall/flake/recursive_flake_pipeline.py

Issue 2243673002: [Findit] Added algorithm to analysis (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: addressed comments, implemented algorithm to get it to one CL, made some UI changes. Unfortunately… Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import copy
5 from datetime import datetime 6 from datetime import datetime
6 7
7 from common import appengine_util 8 from common import appengine_util
8 from common import constants 9 from common import constants
9 from common.pipeline_wrapper import BasePipeline 10 from common.pipeline_wrapper import BasePipeline
10 11
11 from model import analysis_status 12 from model import analysis_status
12 from model.flake.master_flake_analysis import MasterFlakeAnalysis 13 from model.flake.master_flake_analysis import MasterFlakeAnalysis
14 from model.flake.flake_swarming_task import FlakeSwarmingTask
13 from waterfall.trigger_flake_swarming_task_pipeline import ( 15 from waterfall.trigger_flake_swarming_task_pipeline import (
14 TriggerFlakeSwarmingTaskPipeline) 16 TriggerFlakeSwarmingTaskPipeline)
15 from waterfall.process_flake_swarming_task_result_pipeline import ( 17 from waterfall.process_flake_swarming_task_result_pipeline import (
16 ProcessFlakeSwarmingTaskResultPipeline) 18 ProcessFlakeSwarmingTaskResultPipeline)
17 19
20 # TODO(lijeffrey): Move to config.
21 LOWER_FLAKE_THRESHOLD = .02
22 UPPER_FLAKE_THRESHOLD = .98
23 MAX_FLAKE_IN_A_ROW = 4
24 MAX_STABLE_IN_A_ROW = 4
25
18 26
19 class RecursiveFlakePipeline(BasePipeline): 27 class RecursiveFlakePipeline(BasePipeline):
20
21 # Arguments number differs from overridden method - pylint: disable=W0221 28 # Arguments number differs from overridden method - pylint: disable=W0221
22 def run(self, master_name, builder_name, run_build_number, step_name, 29 def run(self, master_name, builder_name, run_build_number, step_name,
23 test_name, master_build_number, queue_name=constants.DEFAULT_QUEUE): 30 test_name, master_build_number, flakiness_algorithm_results_dict,
31 queue_name=constants.DEFAULT_QUEUE):
32 """
33 Args:
34 master_name (str): The master name.
35 builder_name (str): The builder name.
36 run_build_number (int): The build number of the current swarming rerun.
37 step_name (str): The step name.
38 test_name (str): The test name.
39 master_build_number (int): The build number of the Master_Flake_analysis.
40 flakiness_algorithm_results_dict (dict): A dictionary used by
41 NextBuildNumberPipeline
42 queue_name (str): Which queue to run on.
43 Returns:
44 A dict of lists for reliable/flaky tests.
45 """
46
24 # Call trigger pipeline (flake style). 47 # Call trigger pipeline (flake style).
25 task_id = yield TriggerFlakeSwarmingTaskPipeline(master_name, builder_name, 48 task_id = yield TriggerFlakeSwarmingTaskPipeline(
26 run_build_number, step_name, [test_name]) 49 master_name, builder_name, run_build_number, step_name, [test_name])
27 # Pass the trigger pipeline into a process pipeline. 50 # Pass the trigger pipeline into a process pipeline.
28 test_result_future = yield ProcessFlakeSwarmingTaskResultPipeline( 51 test_result_future = yield ProcessFlakeSwarmingTaskResultPipeline(
29 master_name, builder_name, run_build_number, 52 master_name, builder_name, run_build_number,
30 step_name, task_id, master_build_number, test_name) 53 step_name, task_id, master_build_number, test_name)
31 yield NextBuildNumberPipeline( 54 yield NextBuildNumberPipeline(
32 master_name, builder_name, master_build_number, 55 master_name, builder_name, master_build_number, run_build_number,
33 step_name, test_name, test_result_future, queue_name) 56 step_name, test_name, test_result_future, queue_name,
57 flakiness_algorithm_results_dict)
58
59
60 def get_next_run(master, flakiness_algorithm_results_dict):
stgao 2016/08/17 19:16:45 The above comment is still valid, but let's discus
stgao 2016/08/17 19:16:45 It would be awesome if a detailed description of t
caiw 2016/08/17 21:30:32 Sure - I will write something up.
stgao 2016/08/18 00:38:00 As we decided to go with a design doc, it might st
caiw 2016/08/18 20:01:25 Done.
61 # Get the last result.
62 last_result = master.success_rates[-1]
stgao 2016/08/17 19:16:46 Should we have an assert that the list is not empt
caiw 2016/08/17 21:30:32 Then analysis_status will be error in NextBuildNum
stgao 2016/08/18 00:38:00 As discussed, we should handle this corner case. A
caiw 2016/08/18 20:01:25 Done.
63 cur_run = min(master.build_numbers)
64 if (last_result < LOWER_FLAKE_THRESHOLD or
65 last_result > UPPER_FLAKE_THRESHOLD):
66 flakiness_algorithm_results_dict['stable_in_a_row'] += 1
67 if (flakiness_algorithm_results_dict['stable_in_a_row'] >
68 MAX_STABLE_IN_A_ROW):
69 flakiness_algorithm_results_dict['stabled_out'] = True
70 if (flakiness_algorithm_results_dict['stabled_out'] and
71 not flakiness_algorithm_results_dict['flaked_out']):
72 flakiness_algorithm_results_dict['upper_boundary'] = cur_run
stgao 2016/08/17 19:16:45 If we go in this sequence for the Swarming rerun 5
caiw 2016/08/17 21:30:32 Let's talk in person about the algorithm.
73 flakiness_algorithm_results_dict['lower_boundary'] = False
stgao 2016/08/17 19:16:46 it seems the boundary is a mix of int number and b
caiw 2016/08/17 21:30:31 Is using None better style than using False? I ca
stgao 2016/08/18 00:38:00 Yes, None is better in this case.
74 elif (flakiness_algorithm_results_dict['flaked_out'] and
75 not flakiness_algorithm_results_dict['stabled_out'] and
76 not flakiness_algorithm_results_dict['lower_boundary']):
77 flakiness_algorithm_results_dict['lower_boundary'] = cur_run
78 flakiness_algorithm_results_dict['lower_boundary_result'] = 'STABLE'
79 flakiness_algorithm_results_dict['flakes_in_a_row'] = 0
80 step_size = flakiness_algorithm_results_dict['stable_in_a_row'] + 1
81 else:
82 flakiness_algorithm_results_dict['flakes_in_a_row'] += 1
83 if (flakiness_algorithm_results_dict['flakes_in_a_row'] >
84 MAX_FLAKE_IN_A_ROW):
85 flakiness_algorithm_results_dict['flaked_out'] = True
86 if (flakiness_algorithm_results_dict['flaked_out'] and
87 not flakiness_algorithm_results_dict['stabled_out']):
88 flakiness_algorithm_results_dict['upper_boundary'] = cur_run
89 flakiness_algorithm_results_dict['lower_boundary'] = False
90 elif (flakiness_algorithm_results_dict['stabled_out'] and
91 not flakiness_algorithm_results_dict['flaked_out'] and
92 not flakiness_algorithm_results_dict['lower_boundary']):
93 flakiness_algorithm_results_dict['lower_boundary'] = cur_run
94 flakiness_algorithm_results_dict['lower_boundary_result'] = 'FLAKE'
95 flakiness_algorithm_results_dict['stable_in_a_row'] = 0
96 step_size = flakiness_algorithm_results_dict['flakes_in_a_row'] + 1
97 next_run = cur_run - step_size
98 return next_run
99
100
101 def sequential_next_run(master, flakiness_algorithm_results_dict):
102 last_result = master.success_rates[-1]
103 last_result_status = 'FLAKE'
104 if (last_result < LOWER_FLAKE_THRESHOLD or
105 last_result > UPPER_FLAKE_THRESHOLD):
106 last_result_status = 'STABLE'
107 if flakiness_algorithm_results_dict['sequential_run_index'] > 0:
stgao 2016/08/17 19:16:45 sequential_run_index seems not set before use.
caiw 2016/08/17 21:30:32 It's set in initialize_flake_pipeline
108 if (last_result_status !=
stgao 2016/08/17 19:16:46 With the current approach, how big is the gap betw
caiw 2016/08/17 21:30:31 This depends on how big the step size gets, but I
109 flakiness_algorithm_results_dict['lower_boundary_result']):
110 master.suspected_flake_build_number = (
111 flakiness_algorithm_results_dict['lower_boundary'] +
112 flakiness_algorithm_results_dict['sequential_run_index'])
113 master.put()
114 return 0
115 flakiness_algorithm_results_dict['sequential_run_index'] += 1
116 return (flakiness_algorithm_results_dict['lower_boundary'] +
117 flakiness_algorithm_results_dict['sequential_run_index'])
34 118
35 class NextBuildNumberPipeline(BasePipeline): 119 class NextBuildNumberPipeline(BasePipeline):
120
36 # Arguments number differs from overridden method - pylint: disable=W0221 121 # Arguments number differs from overridden method - pylint: disable=W0221
37 # Unused argument - pylint: disable=W0613 122 # Unused argument - pylint: disable=W0613
38 def run(self, master_name, builder_name, master_build_number, step_name, 123 def run(self, master_name, builder_name, master_build_number,
39 test_name, test_result_future, queue_name): 124 run_build_number, step_name, test_name, test_result_future,
125 queue_name, flakiness_algorithm_results_dict):
126
127
40 # Get MasterFlakeAnalysis success list corresponding to parameters. 128 # Get MasterFlakeAnalysis success list corresponding to parameters.
41 master = MasterFlakeAnalysis.Get(master_name, builder_name, 129 master = MasterFlakeAnalysis.Get(master_name, builder_name,
42 master_build_number, step_name, test_name) 130 master_build_number, step_name, test_name)
131 # Don't call another pipeline if we fail.
132 flake_swarming_task = FlakeSwarmingTask.Get(
133 master_name, builder_name, run_build_number, step_name, test_name)
134 if flake_swarming_task.status == analysis_status.ERROR:
135 master.status = analysis_status.ERROR
stgao 2016/08/17 19:16:45 the update is not saved.
caiw 2016/08/17 21:30:32 Done.
136 return
137
43 # Figure out what build_number we should call, if any 138 # Figure out what build_number we should call, if any
44 # This is a placeholder for testing: 139 if (flakiness_algorithm_results_dict['stabled_out'] and
45 next_run = False 140 flakiness_algorithm_results_dict['flaked_out']):
46 if len(master.build_numbers) < 10: 141 next_run = sequential_next_run(master, flakiness_algorithm_results_dict)
47 # TODO(caiw): Develop algorithm to optimize this. 142 else:
48 next_run = min(master.build_numbers) - 10 143 next_run = get_next_run(master, flakiness_algorithm_results_dict)
144
145 if (next_run < flakiness_algorithm_results_dict['last_build_number']):
146 next_run = 0
147
49 if next_run: 148 if next_run:
149 new_flakiness_algorithm_results_dict = copy.deepcopy(
stgao 2016/08/17 19:16:45 What's the reason this dict should be deep copied
caiw 2016/08/17 21:30:31 I think Jeff told me to do this but I forget the r
stgao 2016/08/18 00:38:00 Yes, we could pass it over directly as it is not m
caiw 2016/08/18 20:01:25 Done.
150 flakiness_algorithm_results_dict)
50 pipeline_job = RecursiveFlakePipeline( 151 pipeline_job = RecursiveFlakePipeline(
51 master_name, builder_name, next_run, step_name, test_name, 152 master_name, builder_name, next_run, step_name, test_name,
52 master_build_number) 153 master_build_number,
53 #pylint: disable=W0201 154 flakiness_algorithm_results_dict=new_flakiness_algorithm_results_dict)
155 # pylint: disable=W0201
54 pipeline_job.target = appengine_util.GetTargetNameForModule( 156 pipeline_job.target = appengine_util.GetTargetNameForModule(
55 constants.WATERFALL_BACKEND) 157 constants.WATERFALL_BACKEND)
56 pipeline_job.start(queue_name=queue_name) 158 pipeline_job.start(queue_name=queue_name)
159 else:
160 master.status = analysis_status.COMPLETED
stgao 2016/08/17 19:16:46 same here: the update is not saved.
caiw 2016/08/17 21:30:32 Done.
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698