Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py

Issue 2056623002: [Findit] Adding script for custom try job queries (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Fixing gpylint issues Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Pulls historical try job metadata from Findit and prints a report."""
6
7 import argparse
8 from collections import defaultdict
9 import datetime
10 import os
11 import sys
12
13 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)
14 sys.path.insert(1, _REMOTE_API_DIR)
15
16 import remote_api
17
18 from model.wf_config import FinditConfig
19 from model.wf_try_job_data import WfTryJobData
20
21
22 NOT_AVAILABLE = 'N/A'
23
24
25 def _GetAverageOfNumbersInList(numbers):
26 """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty."""
27 return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE
28
29
30 def _FormatDigits(number):
31 """Formats number into a 2-digit float, or NOT_AVAILABLE."""
32 if isinstance(number, float):
33 return float('%.2f' % number)
34 return NOT_AVAILABLE
35
36
37 def _FormatSecondsAsHMS(seconds):
38 """Formats the number of seconds into hours, minutes, seconds."""
39 if seconds == NOT_AVAILABLE:
40 return NOT_AVAILABLE
41
42 minutes, seconds = divmod(seconds, 60)
43 hours, minutes = divmod(minutes, 60)
44 return '%d:%02d:%02d' % (hours, minutes, seconds)
45
46
47 def _GetReportInformation(try_job_data_list, start_date, end_date):
48 """Computes and returns try job metadata.
49
50 Args:
51 try_job_data_list: A list of WfTryJobData entities.
52 start_date: The earliest request date to compute data.
53 end_date: The latest request date to compute data.
54
55 Returns:
56 A dict in the following format:
57 {
58 'try_jobs_per_day': The average number of jobs requested over the time
59 period specified,
60 'average_regression_range_size': The average number of revisions in the
61 regression range when the original failure was detected,
62 'average_execution_time': The average amount of time spent on each try
63 job not including in-queue time.
64 'average_time_in_queue': The average amount of time a try job spends
65 in-queue before it is picked up.
66 'average_commits_analyzed': The average number of revisions each try job
67 needed to run before coming to a conclusion,
68 'longest_execution_time': The length of time of the slowest try job,
69 'shortest_execution_time': The length of time of the fastest try job,
70 'number_of_try_jobs': The number of try jobs in this list,
71 'detection_rate': The number of try jobs that found any culprits at all
72 regardless of correctness over the total number of try jobs.
73 'error_rate': The number of try jobs that had an error / the total
74 number of try jobs in the list.
75 'time_per_revision': The average amount of execution time spent on each
76 revision.
77 'under_five_minutes_rate': The number of try jobs that finished under 5
78 minutes / total try jobs.
79 'under_fifteen_minutes_rate': The number of try jobs that finished in
80 under 15 minutes / total try jobs.
81 'under_thirty_minutes_rate': The number of try jobs that finished in
82 under 30 minutes / total try jobs.
83 'over_thirty_minutes_rate': The number of try jobs that finished in over
84 30 minutes / total try jobs.
85 }
86 """
87 try_jobs_per_day = NOT_AVAILABLE
88 average_regression_range_size = NOT_AVAILABLE
89 average_execution_time = NOT_AVAILABLE
90 average_time_in_queue = NOT_AVAILABLE
91 average_commits_analyzed = NOT_AVAILABLE
92 longest_execution_time = NOT_AVAILABLE
93 shortest_execution_time = NOT_AVAILABLE
94 detection_rate = NOT_AVAILABLE
95 error_rate = NOT_AVAILABLE
96 number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0
97 time_per_revision = NOT_AVAILABLE
98 under_five_minutes_rate = NOT_AVAILABLE
99 under_fifteen_minutes_rate = NOT_AVAILABLE
100 under_thirty_minutes_rate = NOT_AVAILABLE
101 over_thirty_minutes_rate = NOT_AVAILABLE
102
103 if try_job_data_list:
104 try_jobs_per_day = (
105 len(try_job_data_list) / float((end_date - start_date).days))
106 regression_range_sizes = []
107 execution_times_seconds = []
108 in_queue_times = []
109 commits_analyzed = []
110 culprits_detected = 0
111 errors_detected = 0
112 number_under_five_minutes = 0
113 number_under_fifteen_minutes = 0
114 number_under_thirty_minutes = 0
115 number_over_thirty_minutes = 0
116 total_number_of_try_jobs = len(try_job_data_list)
117
118 for try_job_data in try_job_data_list:
119 # Regression range size.
120 if try_job_data.regression_range_size:
121 regression_range_sizes.append(try_job_data.regression_range_size)
122
123 # Execution time.
124 if try_job_data.start_time and try_job_data.end_time:
125 execution_time_delta = (
126 try_job_data.end_time - try_job_data.start_time)
127 execution_time = execution_time_delta.total_seconds()
128 execution_times_seconds.append(execution_time)
129
130 # In-queue time.
131 if try_job_data.start_time and try_job_data.request_time:
132 in_queue_time_delta = (
133 try_job_data.start_time - try_job_data.request_time)
134 in_queue_time = in_queue_time_delta.total_seconds()
135 in_queue_times.append(in_queue_time)
136
137 # Total time end-to-end.
138 if try_job_data.request_time and try_job_data.end_time:
139 total_time_delta = try_job_data.end_time - try_job_data.start_time
140 total_time_seconds = total_time_delta.total_seconds()
141
142 if total_time_seconds < 300: # Under 5 minutes.
143 number_under_five_minutes += 1
144 elif total_time_seconds < 900: # Under 15 minutes.
145 number_under_fifteen_minutes += 1
146 elif total_time_seconds < 1800: # Under 30 minutes.
147 number_under_thirty_minutes += 1
148 else: # Over 30 minutes.
149 number_over_thirty_minutes += 1
150
151 # Number of commits analyzed.
152 if try_job_data.number_of_commits_analyzed:
153 commits_analyzed.append(try_job_data.number_of_commits_analyzed)
154
155 # Culprit detection rate.
156 if try_job_data.culprits:
157 culprits_detected += 1
158
159 if try_job_data.error:
160 errors_detected += 1
161
162 average_regression_range_size = _GetAverageOfNumbersInList(
163 regression_range_sizes)
164 average_execution_time = (_GetAverageOfNumbersInList(
165 execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE)
166 average_time_in_queue = (
167 _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else
168 NOT_AVAILABLE)
169 average_commits_analyzed = _GetAverageOfNumbersInList(
170 commits_analyzed)
171 longest_execution_time = (
172 str(datetime.timedelta(seconds=max(execution_times_seconds)))
173 if execution_times_seconds else NOT_AVAILABLE)
174 shortest_execution_time = (
175 str(datetime.timedelta(seconds=min(execution_times_seconds)))
176 if execution_times_seconds else NOT_AVAILABLE)
177 detection_rate = float(culprits_detected) / total_number_of_try_jobs
178 error_rate = float(errors_detected) / total_number_of_try_jobs
179 time_per_revision = (average_execution_time / average_commits_analyzed if (
180 average_execution_time != NOT_AVAILABLE and
181 average_commits_analyzed != NOT_AVAILABLE) else NOT_AVAILABLE)
182
183 under_five_minutes_rate = (
184 float(number_under_five_minutes) / total_number_of_try_jobs)
185 under_fifteen_minutes_rate = (
186 float(number_under_fifteen_minutes) / total_number_of_try_jobs)
187 under_thirty_minutes_rate = (
188 float(number_under_thirty_minutes) / total_number_of_try_jobs)
189 over_thirty_minutes_rate = (
190 float(number_over_thirty_minutes) / total_number_of_try_jobs)
191
192 return {
193 'try_jobs_per_day': _FormatDigits(try_jobs_per_day),
194 'average_regression_range_size': _FormatDigits(
195 average_regression_range_size),
196 'average_execution_time': _FormatSecondsAsHMS(
197 _FormatDigits(average_execution_time)),
198 'average_time_in_queue': _FormatSecondsAsHMS(
199 _FormatDigits(average_time_in_queue)),
200 'average_commits_analyzed': _FormatDigits(average_commits_analyzed),
201 'longest_execution_time': longest_execution_time,
202 'shortest_execution_time': shortest_execution_time,
203 'number_of_try_jobs': number_of_try_jobs,
204 'detection_rate': _FormatDigits(detection_rate),
205 'error_rate': _FormatDigits(error_rate),
206 'time_per_revision': _FormatSecondsAsHMS(
207 _FormatDigits(time_per_revision)),
208 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate),
209 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate),
210 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate),
211 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate)
212 }
213
214
215 def PrintCommonStats(try_job_data_list, start_date, end_date, indent):
216 """Takes a list of WfTryJobData entities and prints their stats."""
217 spaces = ''
218 for _ in range(indent):
219 spaces += ' '
220
221 report_info = _GetReportInformation(try_job_data_list, start_date, end_date)
222 for key, value in report_info.iteritems():
223 print '%s%s: %s' % (spaces, key, value)
224
225
226 def PrettyPrint(grouped_data, start_date, end_date, indent=0):
227 if not grouped_data:
228 return
229 if isinstance(grouped_data, list):
230 # Print the stats about the list.
231 PrintCommonStats(grouped_data, start_date, end_date, indent)
232 elif isinstance(grouped_data, dict):
233 spaces = ''
234 for _ in range(indent):
235 spaces += ' '
236
237 for field, data in grouped_data.iteritems():
238 print spaces + field
239 PrettyPrint(data, start_date, end_date, indent + 2)
240 else:
241 raise Exception('grouped_data dict should only contain dicts or lists.')
242
243
244 def _SplitListByTryJobType(try_job_data_list):
245 categorized_data_dict = {
246 'compile': [],
247 'test': []
248 }
249 for try_job_data in try_job_data_list:
250 if try_job_data.try_job_type.lower() == 'compile':
251 categorized_data_dict['compile'].append(try_job_data)
252 elif try_job_data.try_job_type.lower() == 'test':
253 categorized_data_dict['test'].append(try_job_data)
254
255 return categorized_data_dict
256
257
258 def _SplitListByMaster(try_job_data_list):
259 categorized_data_dict = defaultdict(list)
260
261 for try_job_data in try_job_data_list:
262 master_name = try_job_data.master_name
263
264 if not master_name:
265 continue
266
267 categorized_data_dict[master_name].append(try_job_data)
268
269 return categorized_data_dict
270
271
272 def _SplitListByBuilder(try_job_data_list):
273 categorized_data_dict = defaultdict(list)
274
275 for try_job_data in try_job_data_list:
276 builder_name = try_job_data.builder_name
277
278 if not builder_name:
279 continue
280
281 categorized_data_dict[builder_name].append(try_job_data)
282
283 return categorized_data_dict
284
285
286 def _SplitListByHeuristicResults(try_job_data_list):
287 categorized_data_dict = {
288 'with heuristic guidance': [],
289 'without heuristic guidance': []
290 }
291 for try_job_data in try_job_data_list:
292 if try_job_data.has_heuristic_results:
293 categorized_data_dict['with heuristic guidance'].append(try_job_data)
294 else:
295 categorized_data_dict['without heuristic guidance'].append(try_job_data)
296 return categorized_data_dict
297
298
299 def _SplitListByCompileTargets(try_job_data_list):
300 categorized_data_dict = {
301 'with compile targets': [],
302 'without compile targets': []
303 }
304 for try_job_data in try_job_data_list:
305 if try_job_data.has_compile_targets:
306 categorized_data_dict['with compile targets'].append(try_job_data)
307 else:
308 categorized_data_dict['without compile targets'].append(try_job_data)
309 return categorized_data_dict
310
311
312 def SplitListByOption(try_job_data_list, option):
313 """Takes a WfTryJobData list and separates it into a dict based on arg.
314
315 Args:
316 try_job_data_list: A list of WfTryJobData entities.
317 option: An option with which to split the data by.
318
319 Returns:
320 A dict where the keys are how the data is separated based on arg and the
321 values are the corresponding lists of data.
322 """
323
324 if option == 't': # Try job type.
325 return _SplitListByTryJobType(try_job_data_list)
326 elif option == 'm': # Main waterfall master.
327 return _SplitListByMaster(try_job_data_list)
328 elif option == 'b': # Main waterfall builder.
329 return _SplitListByBuilder(try_job_data_list)
330 elif option == 'r': # Whether or not heuristic results are included.
331 return _SplitListByHeuristicResults(try_job_data_list)
332 elif option == 'c': # Whether or not compile targets are included.
333 return _SplitListByCompileTargets(try_job_data_list)
334 # TODO(lijeffrey): Add support for splitting by platform.
335
336 # Unsupported flag, bail out without modification.
337 return try_job_data_list
338
339
340 def SplitStructByOption(try_job_data_struct, option):
341 if isinstance(try_job_data_struct, list):
342 try_job_data_struct = SplitListByOption(try_job_data_struct, option)
343 elif isinstance(try_job_data_struct, dict):
344 for key, struct in try_job_data_struct.iteritems():
345 try_job_data_struct[key] = SplitStructByOption(struct, option)
346 else:
347 raise Exception('try job data dict must only contain lists or dicts.')
348
349 return try_job_data_struct
350
351
352 def GetArgsInOrder():
353 command_line_args = sys.argv[1:]
354
355 parser = argparse.ArgumentParser()
356 parser.add_argument('-t', action='store_true',
357 help='group try job data by type (compile, test)')
358 parser.add_argument('-m', action='store_true',
359 help='group try job data by master')
360 parser.add_argument('-b', action='store_true',
361 help='group try job data by builder')
362 parser.add_argument('-r', action='store_true',
363 help=('group try job data by those with and without '
364 'heuristic results'))
365 parser.add_argument('-c', action='store_true',
366 help=('group try job data by those with and without '
367 'compile targets'))
368
369 args_dict = vars(parser.parse_args())
370
371 # Preserve order from original command.
372 ordered_args = []
373
374 for original_arg in command_line_args:
375 parsed_arg = original_arg[1:]
376 if args_dict[parsed_arg]:
377 ordered_args.append(parsed_arg)
378
379 return ordered_args
380
381
382 if __name__ == '__main__':
383 # Set up the Remote API to use services on the live App Engine.
384 remote_api.EnableRemoteApi(app_id='findit-for-me')
385
386 START_DATE = datetime.datetime(2016, 5, 1)
387 END_DATE = datetime.datetime(2016, 6, 17)
388
389 try_job_data_query = WfTryJobData.query(
390 WfTryJobData.request_time >= START_DATE,
391 WfTryJobData.request_time < END_DATE)
392 categorized_data = try_job_data_query.fetch()
393
394 args = GetArgsInOrder()
395 for arg in args:
396 categorized_data = SplitStructByOption(categorized_data, arg)
397
398 # TODO(lijeffrey): Display data in an html page instead of printing.
399 PrettyPrint(categorized_data, START_DATE, END_DATE)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698