appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py - Issue 2056623002: [Findit] Adding script for custom try job queries

Side by Side Diff: appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py

Issue 2056623002: [Findit] Adding script for custom try job queries (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Pulls historical try job metadata from Findit and prints a report."""

	6

	7 from collections import defaultdict

	8 import datetime

	9 import os

	10 import sys

	11

	12 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)

	13 sys.path.insert(1, _REMOTE_API_DIR)

	14

	15 import remote_api

	16

	17 from model.wf_config import FinditConfig

	18 from model.wf_try_job_data import WfTryJobData

	19

	20

	21 NOT_AVAILABLE = 'N/A'

	22

	23

	24 def _GetAverageOfNumbersInList(numbers):

	25 """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty."""

	26 return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE

	27

	28

	29 def _FormatDigits(number):

	30 """Formats number into a 2-digit float, or NOT_AVAILABLE."""

	31 if isinstance(number, float):

	32 return float('%.2f' % number)

	33 return NOT_AVAILABLE

	34

	35

	36 def _FormatSecondsAsHMS(seconds):

	37 """Formats the number of seconds into hours, minutes, seconds."""

	38 if seconds == NOT_AVAILABLE:

	39 return NOT_AVAILABLE

	40

	41 minutes, seconds = divmod(seconds, 60)

	42 hours, minutes = divmod(minutes, 60)

	43 return '%d:%02d:%02d' % (hours, minutes, seconds)

	44

	45

	46 def _GetReportInformation(try_job_data_list, start_date, end_date):

	47 """Computes and returns try job metadata.

	48

	49 Args:

	50 try_job_data_list: A list of WfTryJobData entities.

	51 start_date: The earliest request date to compute data.

	52 end_date: The latest request date to compute data.

	53

	54 Returns:

	55 A dict in the following format:

	56 {

	57 'try_jobs_per_day': The average number of jobs requested over the time

	58 period specified,

	59 'average_regression_range_size': The average number of revisions in the

	60 regression range when the original failure was detected,

	61 'average_execution_time': The average amount of time spent on each try

	62 job not including in-queue time.

	63 'average_time_in_queue': The average amount of time a try job spends

	64 in-queue before it is picked up.

	65 'average_commits_analyzed': The average number of revisions each try job

	66 needed to run before coming to a conclusion,

	67 'longest_execution_time': The length of time of the slowest try job,

	68 'shortest_execution_time': The length of time of the fastest try job,

	69 'number_of_try_jobs': The number of try jobs in this list,

	70 'detection_rate': The number of try jobs that found any culprits at all

	71 regardless of correctness over the total number of try jobs.

	72 'error_rate': The number of try jobs that had an error / the total

	73 number of try jobs in the list.

	74 'time_per_revision': The average amount of execution time spent on each

	75 revision.

	76 'under_five_minutes_rate': The number of try jobs that finished under 5

	77 minutes / total try jobs.

	78 'under_fifteen_minutes_rate': The number of try jobs that finished in

	79 under 15 minutes / total try jobs.

	80 'under_thirty_minutes_rate': The number of try jobs that finished in

	81 under 30 minutes / total try jobs.

	82 'over_thirty_minutes_rate': The number of try jobs that finished in over

	83 30 minutes / total try jobs.

	84 }

	85 """

	86 try_jobs_per_day = NOT_AVAILABLE

	87 average_regression_range_size = NOT_AVAILABLE

	88 average_execution_time = NOT_AVAILABLE

	89 average_time_in_queue = NOT_AVAILABLE

	90 average_commits_analyzed = NOT_AVAILABLE

	91 longest_execution_time = NOT_AVAILABLE

	92 shortest_execution_time = NOT_AVAILABLE

	93 detection_rate = NOT_AVAILABLE

	94 error_rate = NOT_AVAILABLE

	95 number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0

	96 time_per_revision = NOT_AVAILABLE

	97 under_five_minutes_rate = NOT_AVAILABLE

	98 under_fifteen_minutes_rate = NOT_AVAILABLE

	99 under_thirty_minutes_rate = NOT_AVAILABLE

	100 over_thirty_minutes_rate = NOT_AVAILABLE

	101

	102 if try_job_data_list:

	103 try_jobs_per_day = (

	104 len(try_job_data_list) / float((end_date - start_date).days))

	105 regression_range_sizes = []

	106 execution_times_seconds = []

	107 in_queue_times = []

	108 commits_analyzed = []

	109 culprits_detected = 0

	110 errors_detected = 0

	111 number_under_five_minutes = 0

	112 number_under_fifteen_minutes = 0

	113 number_under_thirty_minutes = 0

	114 number_over_thirty_minutes = 0

	115 total_number_of_try_jobs = len(try_job_data_list)

	116

	117 for try_job_data in try_job_data_list:

	118 # Regression range size.

	119 if try_job_data.regression_range_size:

	120 regression_range_sizes.append(try_job_data.regression_range_size)

	121

	122 # Execution time.

	123 if try_job_data.start_time and try_job_data.end_time:

	124 execution_time_delta = (

	125 try_job_data.end_time - try_job_data.start_time)

	126 execution_time = execution_time_delta.total_seconds()

	127 execution_times_seconds.append(execution_time)

	128

	129 # In-queue time.

	130 if try_job_data.start_time and try_job_data.request_time:

	131 in_queue_time_delta = (

	132 try_job_data.start_time - try_job_data.request_time)

	133 in_queue_time = in_queue_time_delta.total_seconds()

	134 in_queue_times.append(in_queue_time)

	135

	136 # Total time end-to-end.

	137 if try_job_data.request_time and try_job_data.end_time:

	138 total_time_delta = try_job_data.end_time - try_job_data.start_time

	139 total_time_seconds = total_time_delta.total_seconds()

	140

	141 if total_time_seconds < 300: # Under 5 minutes.

	142 number_under_five_minutes += 1

	143 elif total_time_seconds < 900: # Under 15 minutes.

	144 number_under_fifteen_minutes += 1

	145 elif total_time_seconds < 1800: # Under 30 minutes.

	146 number_under_thirty_minutes += 1

	147 else: # Over 30 minutes.

	148 number_over_thirty_minutes += 1

	149

	150 # Number of commits analyzed.

	151 if try_job_data.number_of_commits_analyzed:

	152 commits_analyzed.append(try_job_data.number_of_commits_analyzed)

	153

	154 # Culprit detection rate.

	155 if try_job_data.culprits:

	156 culprits_detected += 1

	157

	158 if try_job_data.error:

	159 errors_detected += 1

	160

	161 average_regression_range_size = _GetAverageOfNumbersInList(

	162 regression_range_sizes)

	163 average_execution_time = (_GetAverageOfNumbersInList(

	164 execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE)

	165 average_time_in_queue = (

	166 _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else

	167 NOT_AVAILABLE)

	168 average_commits_analyzed = _GetAverageOfNumbersInList(

	169 commits_analyzed)

	170 longest_execution_time = (

	171 str(datetime.timedelta(seconds=max(execution_times_seconds)))

	172 if execution_times_seconds else NOT_AVAILABLE)

	173 shortest_execution_time = (

	174 str(datetime.timedelta(seconds=min(execution_times_seconds)))

	175 if execution_times_seconds else NOT_AVAILABLE)

	176 detection_rate = float(culprits_detected) / total_number_of_try_jobs

	177 error_rate = float(errors_detected) / total_number_of_try_jobs

	178 time_per_revision = (average_execution_time / average_commits_analyzed if (

	179 average_execution_time != NOT_AVAILABLE and

	180 average_commits_analyzed != NOT_AVAILABLE) else NOT_AVAILABLE)

	181

	182 under_five_minutes_rate = (

	183 float(number_under_five_minutes) / total_number_of_try_jobs)

	184 under_fifteen_minutes_rate = (

	185 float(number_under_fifteen_minutes) / total_number_of_try_jobs)

	186 under_thirty_minutes_rate = (

	187 float(number_under_thirty_minutes) / total_number_of_try_jobs)

	188 over_thirty_minutes_rate = (

	189 float(number_over_thirty_minutes) / total_number_of_try_jobs)

	190

	191 return {

	192 'try_jobs_per_day': _FormatDigits(try_jobs_per_day),

	193 'average_regression_range_size': _FormatDigits(

	194 average_regression_range_size),

	195 'average_execution_time': _FormatSecondsAsHMS(

	196 _FormatDigits(average_execution_time)),

	197 'average_time_in_queue': _FormatSecondsAsHMS(

	198 _FormatDigits(average_time_in_queue)),

	199 'average_commits_analyzed': _FormatDigits(average_commits_analyzed),

	200 'longest_execution_time': longest_execution_time,

	201 'shortest_execution_time': shortest_execution_time,

	202 'number_of_try_jobs': number_of_try_jobs,

	203 'detection_rate': _FormatDigits(detection_rate),

	204 'error_rate': _FormatDigits(error_rate),

	205 'time_per_revision': _FormatSecondsAsHMS(

	206 _FormatDigits(time_per_revision)),

	207 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate),

	208 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate),

	209 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate),

	210 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate)

	211 }

	212

	213

	214 def PrintCommonStats(try_job_data_list, start_date, end_date, indent):

	215 """Takes a list of WfTryJobData entities and prints their stats."""

	216 spaces = ''

	217 for _ in range(indent):

	218 spaces += ' '

	219

	220 report_info = _GetReportInformation(try_job_data_list, start_date, end_date)

	221 for key, value in report_info.iteritems():

	222 print '%s%s: %s' % (spaces, key, value)

	223

	224

	225 def PrettyPrint(grouped_data, start_date, end_date, indent=0):

	226 if not grouped_data:

	227 return

	228 if isinstance(grouped_data, list):

	229 # Print the stats about the list.

	230 PrintCommonStats(grouped_data, start_date, end_date, indent)

	231 elif isinstance(grouped_data, dict):

	232 spaces = ''

	233 for _ in range(indent):

	234 spaces += ' '

	235

	236 for field, data in grouped_data.iteritems():

	237 print spaces + field

	238 PrettyPrint(data, start_date, end_date, indent + 2)

	239 else:

	240 raise Exception('grouped_data dict should only contain dicts or lists.')

	241

	242

	243 def _SplitListByTryJobType(try_job_data_list):

	244 categorized_data_dict = {

	245 'compile': [],

	246 'test': []

	247 }

	248 for try_job_data in try_job_data_list:

	249 if try_job_data.try_job_type.lower() == 'compile':

	250 categorized_data_dict['compile'].append(try_job_data)

	251 elif try_job_data.try_job_type.lower() == 'test':

	252 categorized_data_dict['test'].append(try_job_data)

	253

	254 return categorized_data_dict

	255

	256

	257 def _SplitListByMaster(try_job_data_list):

	258 categorized_data_dict = defaultdict(list)

	259

	260 for try_job_data in try_job_data_list:

	261 master_name = try_job_data.master_name

	262

	263 if not master_name:

	264 continue

	265

	266 categorized_data_dict[master_name].append(try_job_data)

	267

	268 return categorized_data_dict

	269

	270

	271 def _SplitListByBuilder(try_job_data_list):

	272 categorized_data_dict = defaultdict(list)

	273

	274 for try_job_data in try_job_data_list:

	275 builder_name = try_job_data.builder_name

	276

	277 if not builder_name:

	278 continue

	279

	280 categorized_data_dict[builder_name].append(try_job_data)

	281

	282 return categorized_data_dict

	283

	284

	285 def _SplitListByHeuristicResults(try_job_data_list):

	286 categorized_data_dict = {

	287 'With heuristic guidance': [],

	288 'Without heuristic guidance': []

	289 }

	290 for try_job_data in try_job_data_list:

	291 if try_job_data.has_heuristic_results:

	292 categorized_data_dict['With heuristic guidance'].append(try_job_data)

	293 else:

	294 categorized_data_dict['Without heuristic guidance'].append(try_job_data)

	295 return categorized_data_dict

	296

	297

	298 def _SplitListByCompileTargets(try_job_data_list):

	299 categorized_data_dict = {

	300 'With compile targets': [],

	301 'Without compile targets': []

	302 }

	303 for try_job_data in try_job_data_list:

	304 if try_job_data.has_compile_targets:

	305 categorized_data_dict['With compile targets'].append(try_job_data)

	306 else:

	307 categorized_data_dict['Without compile targets'].append(try_job_data)

	308 return categorized_data_dict

	309

	310

	311 def SplitListByArg(try_job_data_list, arg):

	312 """Takes a WfTryJobData list and separates it into a dict based on arg.

	313

	314 Args:

	315 try_job_data_list: A list of WfTryJobData entities.

	316 arg: An argument with which to split the data by. Options include:

	317 --type: To categorized_data_dict by try job type ('compile', 'test').

	318 --per-master: To categorize try_job_data_list by WfTryJobData.master_name.

	319 --per-builder: To categorize try_job_data_list by

	320 WfTryJobData.builder_name.

	321 --has-heuristic-results: To separate by whether or not heuristic results

	322 were passed to the try job.

	323 --has-compile-targets: To separate by whether or not compile targets were

	324 passed to the try job. Note this field is not meaningful for test try

	325 jobs.

	326

	327 Returns:

	328 A dict where the keys are how the data is separated based on arg and the

	329 values are the corresponding lists of data.

	330 """

	331

	332 if arg == '--type':

	333 return _SplitListByTryJobType(try_job_data_list)

	334 elif arg == '--per-master':

	335 return _SplitListByMaster(try_job_data_list)

	336 elif arg == '--per-builder':

	337 # Since builders are tied to masters, group by master as well first.

	338 categorized_data_dict = _SplitListByMaster(try_job_data_list)

	339

	340 for master, data_list in categorized_data_dict.iteritems():

	341 categorized_data_dict[master] = _SplitListByBuilder(data_list)

	342 return categorized_data_dict

	343 elif arg == '--has-heuristic-results':

	344 return _SplitListByHeuristicResults(try_job_data_list)

	345 elif arg == '--has-compile-targets':

	346 return _SplitListByCompileTargets(try_job_data_list)

	347 # TODO(lijeffrey): Add support for splitting by platform.

	348

	349 # Unsupported flag, bail out without modification.

	350 return try_job_data_list

	351

	352

	353 def SplitStructByArg(try_job_data_struct, arg):

	354 if isinstance(try_job_data_struct, list):

	355 try_job_data_struct = SplitListByArg(try_job_data_struct, arg)

	356 elif isinstance(try_job_data_struct, dict):

	357 for key, struct in try_job_data_struct.iteritems():

	358 try_job_data_struct[key] = SplitStructByArg(struct, arg)

	359 else:

	360 raise Exception('try job data dict must only contain lists or dicts.')

	361

	362 return try_job_data_struct

	363

	364

	365 def GetArgs():

	366 # Split data based on command line passed.

	367 args = sys.argv[1:]
	stgao 2016/06/13 23:48:21 If we can't use https://docs.python.org/2/howto/ar If we can't use https://docs.python.org/2/howto/argparse.html for the order of switches, we'd better add a switch --help to print out all available options with usage explanation. lijeffrey 2016/06/16 19:12:52 Done. Show quoted text On 2016/06/13 23:48:21, stgao wrote: > If we can't use https://docs.python.org/2/howto/argparse.html for the order of > switches, we'd better add a switch --help to print out all available options > with usage explanation. Done.
	368

	369 if '--per-master' in args and '--per-builder' in args:

	370 # Categorizing by builder should categorize per master regardles. Remove

	371 # per master to avoid doing it twice.

	372 args.remove('--per-master')

	373

	374 return args

	375

	376

	377 if __name__ == '__main__':

	378 # Set up the Remote API to use services on the live App Engine.

	379 remote_api.EnableRemoteApi(app_id='findit-for-me')

	380

	381 START_DATE = datetime.datetime(2016, 5, 1)

	382 END_DATE = datetime.datetime(2016, 6, 8)

	383

	384 try_job_data_query = WfTryJobData.query(

	385 WfTryJobData.request_time >= START_DATE,

	386 WfTryJobData.request_time < END_DATE)

	387 categorized_data = try_job_data_query.fetch()

	388

	389 args = GetArgs()

	390

	391 for arg in args:

	392 categorized_data = SplitStructByArg(categorized_data, arg)

	393

	394 PrettyPrint(categorized_data, START_DATE, END_DATE)

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »