Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(127)

Side by Side Diff: appengine/findit/util_scripts/remote_queries/swarming_task_data_query.py

Issue 1772173002: [Findit] Adding swarming task metadata query page (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Cleanup Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Pulls historical swarming task metadata from Findit and prints a report."""
6
7 from collections import defaultdict
8 from collections import OrderedDict
9 import datetime
10 import os
11 import sys
12
13 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)
14 sys.path.insert(1, _REMOTE_API_DIR)
15
16 import remote_api
17
18 from model import wf_analysis_status
19 from model.wf_swarming_task import WfSwarmingTask
20
21
22 NOT_AVAILABLE = 'N/A'
23
24
25 # TODO(lijeffrey): Refactor helper methods into module sharable with
26 # try_job_data_query.py.
27 def _GetAverageOfNumbersInList(numbers):
28 """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty."""
29 return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE
30
31
32 def _FormatDigits(number):
33 """Formats number into a 2-digit float, or NOT_AVAILABLE."""
34 if isinstance(number, (int, float)):
35 return float('%.2f' % number)
36 return NOT_AVAILABLE
37
38
39 def _FormatSecondsAsHMS(seconds):
40 """Formats the number of seconds into hours, minutes, seconds."""
41 if seconds == NOT_AVAILABLE:
42 return NOT_AVAILABLE
43
44 minutes, seconds = divmod(seconds, 60)
45 hours, minutes = divmod(minutes, 60)
46 return '%d:%02d:%02d' % (hours, minutes, seconds)
47
48
49 def _FormatStepName(step_name):
50 # Formats step_name to return only the first word (the step name itself).
51 # Step names are expected to be in either the format 'step_name' or
52 # 'step_name on platform'.
53 return step_name.strip().split(' ')[0]
54
55
56 def _CategorizeSwarmingTaskData(swarming_task_list):
57 """Categorizes swarming_task_list into a dict.
58
59 Args:
60 swarming_task_list: A list of WfSwarmingTask objects.
61
62 Returns:
63 A dict in the format:
64 {
65 priority1: {
66 master_name1': {
67 'builder_name1': {
68 'step_name1': [WfSwarmingTask1, WfSwarmingTask2, ...],
69 ...
70 },
71 ...
72 },
73 ...
74 },
75 ...
76 }
77 """
78 categorized_data = defaultdict(
79 lambda: defaultdict(
80 lambda: defaultdict(
81 lambda: defaultdict(list))))
82
83 for swarming_task in swarming_task_list:
84 if (not swarming_task.parameters or not swarming_task.tests_statuses or
85 swarming_task.status != wf_analysis_status.ANALYZED):
86 # Disregard any swarming tasks that are not yet completed or were
87 # triggered before 'parameters' and 'tests_statuses' were introduced.
88 continue
89
90 priority = swarming_task.parameters['priority']
91 master_name = swarming_task.master_name
92 builder_name = swarming_task.builder_name
93 step_name = swarming_task.key.id()
94
95 categorized_data[priority][master_name][builder_name][step_name].append(
96 swarming_task)
97
98 return categorized_data
99
100
101 def _GetReportInformation(swarming_task_list, start_date, end_date):
102 """Computes and returns swarming task metadata in a dict.
103
104 Args:
105 swarming_task_list: A list of WfSwarmingTask entities.
106 start_date: The earliest request date to compute data.
107 end_date: The latest request date to compute data.
108
109 Returns:
110 A dict in the following format:
111 {
112 'swarming_tasks_per_day': The average number of swwarming tasks
113 requested over the time period specified,
114 'average_execution_time': The average amount of time spent on each
115 swarming task not including in-queue time.
116 'average_time_in_queue': The average amount of time a swarming task
117 spends in-queue before it is picked up.
118 'longest_execution_time': The length of time of the slowest swarming
119 task in the period requested,
120 'shortest_execution_time': The length of time of the fastest swarming
121 task in the period requested.
122 'tests_times_iterations': The number of tests multiplied by the number
123 of iterations that test was run.
124 'average_number_of_iterations': The average number of iterations each
125 test for this step was run.
126 'error_rate': The number of tasks that ended in error out of all tasks
127 in swarming_task_list.
128 }
129 """
130 swarming_tasks_per_day = NOT_AVAILABLE
131 average_execution_time = NOT_AVAILABLE
132 average_time_in_queue = NOT_AVAILABLE
133 longest_execution_time = NOT_AVAILABLE
134 shortest_execution_time = NOT_AVAILABLE
135 average_number_of_iterations = NOT_AVAILABLE
136 average_number_of_tests_run = NOT_AVAILABLE
137 error_rate = NOT_AVAILABLE
138
139 if swarming_task_list:
140 task_count = len(swarming_task_list)
141 swarming_tasks_per_day = task_count / float((end_date - start_date).days)
142 execution_times_seconds = []
143 in_queue_times = []
144 iteration_counts = []
145 tests_counts = []
146 error_count = 0
147
148 for swarming_task in swarming_task_list:
149 # Execution time.
150 if swarming_task.started_time and swarming_task.completed_time:
151 execution_times_seconds.append(
152 (swarming_task.completed_time - swarming_task.started_time).seconds)
153
154 # In-queue time.
155 if swarming_task.started_time and swarming_task.created_time:
156 in_queue_times.append(
157 (swarming_task.started_time - swarming_task.created_time).seconds)
158
159 # Number of iterations.
160 iterations_to_rerun = swarming_task.parameters.get(
161 'iterations_to_rerun')
162 if iterations_to_rerun is not None:
163 iteration_counts.append(iterations_to_rerun)
164
165 # Number of tests.
166 number_of_tests = len(swarming_task.tests_statuses)
167 if number_of_tests:
168 tests_counts.append(number_of_tests)
169
170 # Error rate.
171 if swarming_task.status == wf_analysis_status.ERROR:
172 error_count += 1
173
174 average_execution_time = (_GetAverageOfNumbersInList(
175 execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE)
176 average_time_in_queue = (
177 _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else
178 NOT_AVAILABLE)
179 longest_execution_time = (
180 str(datetime.timedelta(seconds=max(execution_times_seconds)))
181 if execution_times_seconds else NOT_AVAILABLE)
182 shortest_execution_time = (
183 str(datetime.timedelta(seconds=min(execution_times_seconds)))
184 if execution_times_seconds else NOT_AVAILABLE)
185 average_number_of_iterations = _GetAverageOfNumbersInList(iteration_counts)
186 average_number_of_tests_run = _GetAverageOfNumbersInList(tests_counts)
187 tests_times_iterations = (
188 average_number_of_iterations * average_number_of_tests_run)
189 error_rate = error_count / task_count
190
191 return {
192 'swarming_tasks_per_day': swarming_tasks_per_day,
193 'average_execution_time': average_execution_time,
194 'average_time_in_queue': average_time_in_queue,
195 'longest_execution_time': longest_execution_time,
196 'shortest_execution_time': shortest_execution_time,
197 'tests_times_iterations': tests_times_iterations,
198 'average_number_of_iterations': average_number_of_iterations,
199 'average_number_of_tests_run': average_number_of_tests_run,
200 'error_rate': error_rate
201 }
202
203
204 def _GetReport(categorized_swarming_task_dict, start_date, end_date):
205 """Returns a swarming task data report as an ordered dict sorted by priority.
206
207 Args:
208 categorized_swarming_task_dict: A dict categorizing WFSwarmingTask entities
209 organized by priority, master_name, builder_name, step_name. This dict
210 should be the output from _CategorizeSwarmingTaskData().
211 start_date: The earliest request date for which data should be computed.
212 end_date: The latest request date for which data should be computed.
213
214 Returns:
215 An ordered dict by highest priority (lower priority number) swarming tasks
216 in the format:
217 {
218 priority: {
219 'master_name': {
220 'builder_name': {
221 'step_name': {
222 'swarming_tasks_per_day': number or 'N/A',
223 'average_execution_time': number or 'N/A',
224 'average_time_in_queue': number or 'N/A',
225 'longest_execution_time': number or 'N/A',
226 'shortest_execution_time': number or 'N/A',
227 'tests_times_iterations': number or 'N/A'
228 'average_number_of_tests_run': number or 'N/A',
229 'average_number_of_iterations': number or 'N/A',
230 'error_rate': number or 'N/A'
231 },
232 ...
233 },
234 ...
235 },
236 ...
237 },
238 ...
239 }
240 """
241 report = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
242
243 for priority, masters in categorized_swarming_task_dict.iteritems():
244 for master, builders in masters.iteritems():
245 for builder, steps in builders.iteritems():
246 for step, swarming_task_data_list in steps.iteritems():
247 report[priority][master][builder][step] = _GetReportInformation(
248 swarming_task_data_list, start_date, end_date)
249
250 return OrderedDict(sorted(report.items()))
251
252
253 def CreateHtmlPage(report, start_date, end_date):
254 """Generates an html string for displaying the report.
255
256 Args:
257 report: A dict containing all the relevant information returned from
258 _GetReport().
259 start_date: The earliest date that a swarming task was requested.
260 end_date: The latest date that a swarming task was requested.
261
262 Returns:
263 A string containing the html body for the final report page.
264 """
265 html = """
266 <style>
267 table {
268 border-collapse: collapse;
269 border: 1px solid gray;
270 }
271 table td, th {
272 border: 1px solid gray;
273 }
274 </style>"""
275 html += '<b>Swarming task metadata from %s to %s (%s days)</b>' % (
276 str(start_date), str(end_date), (end_date - start_date).days)
277 html += '<h1>Aggregate metadata for swarming tasks by priority</h1>'
278
279 cell_template = '<td>%s</td>'
280
281 for priority, masters in report.iteritems():
282 html += '<h2>Task Priority: %s</h2>' % priority
283 html += """
284 <table>
285 <tr>
286 <th>Master</th>
287 <th>Builder</th>
288 <th>Step</th>
289 <th>Average # Tasks Per Day</th>
290 <th>Average Time In Queue</th>
291 <th>Average Execution Time</th>
292 <th>Longest Execution Time</th>
293 <th>Shortest Execution Time</th>
294 <th># Tests * # Iterations</th>
295 <th>Average # Iterations</th>
296 <th>Average # Tests Run</th>
297 <th>Error Rate</th>
298 </tr>"""
299
300 for master_name, builder_reports in masters.iteritems():
301 for builder_name, steps in builder_reports.iteritems():
302 for step_name in steps:
303 builder_report = (
304 report[priority][master_name][builder_name][step_name])
305
306 html += '<tr>'
307 html += cell_template % master_name
308 html += cell_template % builder_name
309 html += cell_template % _FormatStepName(step_name)
310 html += cell_template % _FormatDigits(
311 builder_report['swarming_tasks_per_day'])
312 html += cell_template % _FormatSecondsAsHMS(_FormatDigits(
313 builder_report['average_time_in_queue']))
314 html += cell_template % _FormatSecondsAsHMS(_FormatDigits(
315 builder_report['average_execution_time']))
316 html += cell_template % builder_report['longest_execution_time']
317 html += cell_template % builder_report['shortest_execution_time']
318 html += cell_template % _FormatDigits(
319 builder_report['tests_times_iterations'])
320 html += cell_template % _FormatDigits(
321 builder_report['average_number_of_iterations'])
322 html += cell_template % _FormatDigits(
323 builder_report['average_number_of_tests_run'])
324 html += cell_template % _FormatDigits(builder_report['error_rate'])
325
326 html += '</table>'
327
328 return html
329
330
331 if __name__ == '__main__':
332 # Set up the Remote API to use services on the live App Engine.
333 remote_api.EnableRemoteApi(app_id='findit-for-me')
334
335 START_DATE = datetime.datetime(2016, 2, 1)
336 END_DATE = datetime.datetime(2016, 3, 7)
337
338 wf_analysis_query = WfSwarmingTask.query(
339 WfSwarmingTask.created_time >= START_DATE,
340 WfSwarmingTask.created_time < END_DATE)
341 data_list = wf_analysis_query.fetch()
342
343 categorized_data_dict = _CategorizeSwarmingTaskData(data_list)
344 final_report = _GetReport(categorized_data_dict, START_DATE, END_DATE)
345
346 findit_tmp_dir = os.environ.get('TMP_DIR')
347 if not findit_tmp_dir:
348 findit_tmp_dir = os.getcwd()
349
350 report_path = os.path.join(findit_tmp_dir,
351 'swarming_task_metadata_report.html')
352
353 with open(report_path, 'w') as f:
354 f.write(CreateHtmlPage(final_report, START_DATE, END_DATE))
355
356 print 'Swarming task metadata report available at file://%s' % report_path
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698