Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py

Issue 2056623002: [Findit] Adding script for custom try job queries (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Pulls historical try job metadata from Findit and prints a report."""
6
7 from collections import defaultdict
8 import datetime
9 import os
10 import sys
11
12 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)
13 sys.path.insert(1, _REMOTE_API_DIR)
14
15 import remote_api
16
17 from model.wf_config import FinditConfig
18 from model.wf_try_job_data import WfTryJobData
19
20
21 NOT_AVAILABLE = 'N/A'
22
23
24 def _GetAverageOfNumbersInList(numbers):
25 """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty."""
26 return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE
27
28
29 def _FormatDigits(number):
30 """Formats number into a 2-digit float, or NOT_AVAILABLE."""
31 if isinstance(number, float):
32 return float('%.2f' % number)
33 return NOT_AVAILABLE
34
35
36 def _FormatSecondsAsHMS(seconds):
37 """Formats the number of seconds into hours, minutes, seconds."""
38 if seconds == NOT_AVAILABLE:
39 return NOT_AVAILABLE
40
41 minutes, seconds = divmod(seconds, 60)
42 hours, minutes = divmod(minutes, 60)
43 return '%d:%02d:%02d' % (hours, minutes, seconds)
44
45
46 def _GetReportInformation(try_job_data_list, start_date, end_date):
47 """Computes and returns try job metadata.
48
49 Args:
50 try_job_data_list: A list of WfTryJobData entities.
51 start_date: The earliest request date to compute data.
52 end_date: The latest request date to compute data.
53
54 Returns:
55 A dict in the following format:
56 {
57 'try_jobs_per_day': The average number of jobs requested over the time
58 period specified,
59 'average_regression_range_size': The average number of revisions in the
60 regression range when the original failure was detected,
61 'average_execution_time': The average amount of time spent on each try
62 job not including in-queue time.
63 'average_time_in_queue': The average amount of time a try job spends
64 in-queue before it is picked up.
65 'average_commits_analyzed': The average number of revisions each try job
66 needed to run before coming to a conclusion,
67 'longest_execution_time': The length of time of the slowest try job,
68 'shortest_execution_time': The length of time of the fastest try job,
69 'number_of_try_jobs': The number of try jobs in this list,
70 'detection_rate': The number of try jobs that found any culprits at all
71 regardless of correctness over the total number of try jobs.
72 'error_rate': The number of try jobs that had an error / the total
73 number of try jobs in the list.
74 'time_per_revision': The average amount of execution time spent on each
75 revision.
76 'under_five_minutes_rate': The number of try jobs that finished under 5
77 minutes / total try jobs.
78 'under_fifteen_minutes_rate': The number of try jobs that finished in
79 under 15 minutes / total try jobs.
80 'under_thirty_minutes_rate': The number of try jobs that finished in
81 under 30 minutes / total try jobs.
82 'over_thirty_minutes_rate': The number of try jobs that finished in over
83 30 minutes / total try jobs.
84 }
85 """
86 try_jobs_per_day = NOT_AVAILABLE
87 average_regression_range_size = NOT_AVAILABLE
88 average_execution_time = NOT_AVAILABLE
89 average_time_in_queue = NOT_AVAILABLE
90 average_commits_analyzed = NOT_AVAILABLE
91 longest_execution_time = NOT_AVAILABLE
92 shortest_execution_time = NOT_AVAILABLE
93 detection_rate = NOT_AVAILABLE
94 error_rate = NOT_AVAILABLE
95 number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0
96 time_per_revision = NOT_AVAILABLE
97 under_five_minutes_rate = NOT_AVAILABLE
98 under_fifteen_minutes_rate = NOT_AVAILABLE
99 under_thirty_minutes_rate = NOT_AVAILABLE
100 over_thirty_minutes_rate = NOT_AVAILABLE
101
102 if try_job_data_list:
103 try_jobs_per_day = (
104 len(try_job_data_list) / float((end_date - start_date).days))
105 regression_range_sizes = []
106 execution_times_seconds = []
107 in_queue_times = []
108 commits_analyzed = []
109 culprits_detected = 0
110 errors_detected = 0
111 number_under_five_minutes = 0
112 number_under_fifteen_minutes = 0
113 number_under_thirty_minutes = 0
114 number_over_thirty_minutes = 0
115 total_number_of_try_jobs = len(try_job_data_list)
116
117 for try_job_data in try_job_data_list:
118 # Regression range size.
119 if try_job_data.regression_range_size:
120 regression_range_sizes.append(try_job_data.regression_range_size)
121
122 # Execution time.
123 if try_job_data.start_time and try_job_data.end_time:
124 execution_time_delta = (
125 try_job_data.end_time - try_job_data.start_time)
126 execution_time = execution_time_delta.total_seconds()
127 execution_times_seconds.append(execution_time)
128
129 # In-queue time.
130 if try_job_data.start_time and try_job_data.request_time:
131 in_queue_time_delta = (
132 try_job_data.start_time - try_job_data.request_time)
133 in_queue_time = in_queue_time_delta.total_seconds()
134 in_queue_times.append(in_queue_time)
135
136 # Total time end-to-end.
137 if try_job_data.request_time and try_job_data.end_time:
138 total_time_delta = try_job_data.end_time - try_job_data.start_time
139 total_time_seconds = total_time_delta.total_seconds()
140
141 if total_time_seconds < 300: # Under 5 minutes.
142 number_under_five_minutes += 1
143 elif total_time_seconds < 900: # Under 15 minutes.
144 number_under_fifteen_minutes += 1
145 elif total_time_seconds < 1800: # Under 30 minutes.
146 number_under_thirty_minutes += 1
147 else: # Over 30 minutes.
148 number_over_thirty_minutes += 1
149
150 # Number of commits analyzed.
151 if try_job_data.number_of_commits_analyzed:
152 commits_analyzed.append(try_job_data.number_of_commits_analyzed)
153
154 # Culprit detection rate.
155 if try_job_data.culprits:
156 culprits_detected += 1
157
158 if try_job_data.error:
159 errors_detected += 1
160
161 average_regression_range_size = _GetAverageOfNumbersInList(
162 regression_range_sizes)
163 average_execution_time = (_GetAverageOfNumbersInList(
164 execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE)
165 average_time_in_queue = (
166 _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else
167 NOT_AVAILABLE)
168 average_commits_analyzed = _GetAverageOfNumbersInList(
169 commits_analyzed)
170 longest_execution_time = (
171 str(datetime.timedelta(seconds=max(execution_times_seconds)))
172 if execution_times_seconds else NOT_AVAILABLE)
173 shortest_execution_time = (
174 str(datetime.timedelta(seconds=min(execution_times_seconds)))
175 if execution_times_seconds else NOT_AVAILABLE)
176 detection_rate = float(culprits_detected) / total_number_of_try_jobs
177 error_rate = float(errors_detected) / total_number_of_try_jobs
178 time_per_revision = (average_execution_time / average_commits_analyzed if (
179 average_execution_time != NOT_AVAILABLE and
180 average_commits_analyzed != NOT_AVAILABLE) else NOT_AVAILABLE)
181
182 under_five_minutes_rate = (
183 float(number_under_five_minutes) / total_number_of_try_jobs)
184 under_fifteen_minutes_rate = (
185 float(number_under_fifteen_minutes) / total_number_of_try_jobs)
186 under_thirty_minutes_rate = (
187 float(number_under_thirty_minutes) / total_number_of_try_jobs)
188 over_thirty_minutes_rate = (
189 float(number_over_thirty_minutes) / total_number_of_try_jobs)
190
191 return {
192 'try_jobs_per_day': _FormatDigits(try_jobs_per_day),
193 'average_regression_range_size': _FormatDigits(
194 average_regression_range_size),
195 'average_execution_time': _FormatSecondsAsHMS(
196 _FormatDigits(average_execution_time)),
197 'average_time_in_queue': _FormatSecondsAsHMS(
198 _FormatDigits(average_time_in_queue)),
199 'average_commits_analyzed': _FormatDigits(average_commits_analyzed),
200 'longest_execution_time': longest_execution_time,
201 'shortest_execution_time': shortest_execution_time,
202 'number_of_try_jobs': number_of_try_jobs,
203 'detection_rate': _FormatDigits(detection_rate),
204 'error_rate': _FormatDigits(error_rate),
205 'time_per_revision': _FormatSecondsAsHMS(
206 _FormatDigits(time_per_revision)),
207 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate),
208 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate),
209 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate),
210 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate)
211 }
212
213
214 def PrintCommonStats(try_job_data_list, start_date, end_date, indent):
215 """Takes a list of WfTryJobData entities and prints their stats."""
216 spaces = ''
217 for _ in range(indent):
218 spaces += ' '
219
220 report_info = _GetReportInformation(try_job_data_list, start_date, end_date)
221 for key, value in report_info.iteritems():
222 print '%s%s: %s' % (spaces, key, value)
223
224
225 def PrettyPrint(grouped_data, start_date, end_date, indent=0):
226 if not grouped_data:
227 return
228 if isinstance(grouped_data, list):
229 # Print the stats about the list.
230 PrintCommonStats(grouped_data, start_date, end_date, indent)
231 elif isinstance(grouped_data, dict):
232 spaces = ''
233 for _ in range(indent):
234 spaces += ' '
235
236 for field, data in grouped_data.iteritems():
237 print spaces + field
238 PrettyPrint(data, start_date, end_date, indent + 2)
239 else:
240 raise Exception('grouped_data dict should only contain dicts or lists.')
241
242
243 def _SplitListByTryJobType(try_job_data_list):
244 categorized_data_dict = {
245 'compile': [],
246 'test': []
247 }
248 for try_job_data in try_job_data_list:
249 if try_job_data.try_job_type.lower() == 'compile':
250 categorized_data_dict['compile'].append(try_job_data)
251 elif try_job_data.try_job_type.lower() == 'test':
252 categorized_data_dict['test'].append(try_job_data)
253
254 return categorized_data_dict
255
256
257 def _SplitListByMaster(try_job_data_list):
258 categorized_data_dict = defaultdict(list)
259
260 for try_job_data in try_job_data_list:
261 master_name = try_job_data.master_name
262
263 if not master_name:
264 continue
265
266 categorized_data_dict[master_name].append(try_job_data)
267
268 return categorized_data_dict
269
270
271 def _SplitListByBuilder(try_job_data_list):
272 categorized_data_dict = defaultdict(list)
273
274 for try_job_data in try_job_data_list:
275 builder_name = try_job_data.builder_name
276
277 if not builder_name:
278 continue
279
280 categorized_data_dict[builder_name].append(try_job_data)
281
282 return categorized_data_dict
283
284
285 def _SplitListByHeuristicResults(try_job_data_list):
286 categorized_data_dict = {
287 'With heuristic guidance': [],
288 'Without heuristic guidance': []
289 }
290 for try_job_data in try_job_data_list:
291 if try_job_data.has_heuristic_results:
292 categorized_data_dict['With heuristic guidance'].append(try_job_data)
293 else:
294 categorized_data_dict['Without heuristic guidance'].append(try_job_data)
295 return categorized_data_dict
296
297
298 def _SplitListByCompileTargets(try_job_data_list):
299 categorized_data_dict = {
300 'With compile targets': [],
301 'Without compile targets': []
302 }
303 for try_job_data in try_job_data_list:
304 if try_job_data.has_compile_targets:
305 categorized_data_dict['With compile targets'].append(try_job_data)
306 else:
307 categorized_data_dict['Without compile targets'].append(try_job_data)
308 return categorized_data_dict
309
310
311 def SplitListByArg(try_job_data_list, arg):
312 """Takes a WfTryJobData list and separates it into a dict based on arg.
313
314 Args:
315 try_job_data_list: A list of WfTryJobData entities.
316 arg: An argument with which to split the data by. Options include:
317 --type: To categorized_data_dict by try job type ('compile', 'test').
318 --per-master: To categorize try_job_data_list by WfTryJobData.master_name.
319 --per-builder: To categorize try_job_data_list by
320 WfTryJobData.builder_name.
321 --has-heuristic-results: To separate by whether or not heuristic results
322 were passed to the try job.
323 --has-compile-targets: To separate by whether or not compile targets were
324 passed to the try job. Note this field is not meaningful for test try
325 jobs.
326
327 Returns:
328 A dict where the keys are how the data is separated based on arg and the
329 values are the corresponding lists of data.
330 """
331
332 if arg == '--type':
333 return _SplitListByTryJobType(try_job_data_list)
334 elif arg == '--per-master':
335 return _SplitListByMaster(try_job_data_list)
336 elif arg == '--per-builder':
337 # Since builders are tied to masters, group by master as well first.
338 categorized_data_dict = _SplitListByMaster(try_job_data_list)
339
340 for master, data_list in categorized_data_dict.iteritems():
341 categorized_data_dict[master] = _SplitListByBuilder(data_list)
342 return categorized_data_dict
343 elif arg == '--has-heuristic-results':
344 return _SplitListByHeuristicResults(try_job_data_list)
345 elif arg == '--has-compile-targets':
346 return _SplitListByCompileTargets(try_job_data_list)
347 # TODO(lijeffrey): Add support for splitting by platform.
348
349 # Unsupported flag, bail out without modification.
350 return try_job_data_list
351
352
353 def SplitStructByArg(try_job_data_struct, arg):
354 if isinstance(try_job_data_struct, list):
355 try_job_data_struct = SplitListByArg(try_job_data_struct, arg)
356 elif isinstance(try_job_data_struct, dict):
357 for key, struct in try_job_data_struct.iteritems():
358 try_job_data_struct[key] = SplitStructByArg(struct, arg)
359 else:
360 raise Exception('try job data dict must only contain lists or dicts.')
361
362 return try_job_data_struct
363
364
365 def GetArgs():
366 # Split data based on command line passed.
367 args = sys.argv[1:]
stgao 2016/06/13 23:48:21 If we can't use https://docs.python.org/2/howto/ar
lijeffrey 2016/06/16 19:12:52 Done.
368
369 if '--per-master' in args and '--per-builder' in args:
370 # Categorizing by builder should categorize per master regardles. Remove
371 # per master to avoid doing it twice.
372 args.remove('--per-master')
373
374 return args
375
376
377 if __name__ == '__main__':
378 # Set up the Remote API to use services on the live App Engine.
379 remote_api.EnableRemoteApi(app_id='findit-for-me')
380
381 START_DATE = datetime.datetime(2016, 5, 1)
382 END_DATE = datetime.datetime(2016, 6, 8)
383
384 try_job_data_query = WfTryJobData.query(
385 WfTryJobData.request_time >= START_DATE,
386 WfTryJobData.request_time < END_DATE)
387 categorized_data = try_job_data_query.fetch()
388
389 args = GetArgs()
390
391 for arg in args:
392 categorized_data = SplitStructByArg(categorized_data, arg)
393
394 PrettyPrint(categorized_data, START_DATE, END_DATE)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698