appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py - Issue 2056623002: [Findit] Adding script for custom try job queries

Side by Side Diff: appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py

Issue 2056623002: [Findit] Adding script for custom try job queries (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Fixing gpylint issues Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Pulls historical try job metadata from Findit and prints a report."""

	6

	7 import argparse

	8 from collections import defaultdict

	9 import datetime

	10 import os

	11 import sys

	12

	13 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)

	14 sys.path.insert(1, _REMOTE_API_DIR)

	15

	16 import remote_api

	17

	18 from model.wf_config import FinditConfig

	19 from model.wf_try_job_data import WfTryJobData

	20

	21

	22 NOT_AVAILABLE = 'N/A'

	23

	24

	25 def _GetAverageOfNumbersInList(numbers):

	26 """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty."""

	27 return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE

	28

	29

	30 def _FormatDigits(number):

	31 """Formats number into a 2-digit float, or NOT_AVAILABLE."""

	32 if isinstance(number, float):

	33 return float('%.2f' % number)

	34 return NOT_AVAILABLE

	35

	36

	37 def _FormatSecondsAsHMS(seconds):

	38 """Formats the number of seconds into hours, minutes, seconds."""

	39 if seconds == NOT_AVAILABLE:

	40 return NOT_AVAILABLE

	41

	42 minutes, seconds = divmod(seconds, 60)

	43 hours, minutes = divmod(minutes, 60)

	44 return '%d:%02d:%02d' % (hours, minutes, seconds)

	45

	46

	47 def _GetReportInformation(try_job_data_list, start_date, end_date):

	48 """Computes and returns try job metadata.

	49

	50 Args:

	51 try_job_data_list: A list of WfTryJobData entities.

	52 start_date: The earliest request date to compute data.

	53 end_date: The latest request date to compute data.

	54

	55 Returns:

	56 A dict in the following format:

	57 {

	58 'try_jobs_per_day': The average number of jobs requested over the time

	59 period specified,

	60 'average_regression_range_size': The average number of revisions in the

	61 regression range when the original failure was detected,

	62 'average_execution_time': The average amount of time spent on each try

	63 job not including in-queue time.

	64 'average_time_in_queue': The average amount of time a try job spends

	65 in-queue before it is picked up.

	66 'average_commits_analyzed': The average number of revisions each try job

	67 needed to run before coming to a conclusion,

	68 'longest_execution_time': The length of time of the slowest try job,

	69 'shortest_execution_time': The length of time of the fastest try job,

	70 'number_of_try_jobs': The number of try jobs in this list,

	71 'detection_rate': The number of try jobs that found any culprits at all

	72 regardless of correctness over the total number of try jobs.

	73 'error_rate': The number of try jobs that had an error / the total

	74 number of try jobs in the list.

	75 'time_per_revision': The average amount of execution time spent on each

	76 revision.

	77 'under_five_minutes_rate': The number of try jobs that finished under 5

	78 minutes / total try jobs.

	79 'under_fifteen_minutes_rate': The number of try jobs that finished in

	80 under 15 minutes / total try jobs.

	81 'under_thirty_minutes_rate': The number of try jobs that finished in

	82 under 30 minutes / total try jobs.

	83 'over_thirty_minutes_rate': The number of try jobs that finished in over

	84 30 minutes / total try jobs.

	85 }

	86 """

	87 try_jobs_per_day = NOT_AVAILABLE

	88 average_regression_range_size = NOT_AVAILABLE

	89 average_execution_time = NOT_AVAILABLE

	90 average_time_in_queue = NOT_AVAILABLE

	91 average_commits_analyzed = NOT_AVAILABLE

	92 longest_execution_time = NOT_AVAILABLE

	93 shortest_execution_time = NOT_AVAILABLE

	94 detection_rate = NOT_AVAILABLE

	95 error_rate = NOT_AVAILABLE

	96 number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0

	97 time_per_revision = NOT_AVAILABLE

	98 under_five_minutes_rate = NOT_AVAILABLE

	99 under_fifteen_minutes_rate = NOT_AVAILABLE

	100 under_thirty_minutes_rate = NOT_AVAILABLE

	101 over_thirty_minutes_rate = NOT_AVAILABLE

	102

	103 if try_job_data_list:

	104 try_jobs_per_day = (

	105 len(try_job_data_list) / float((end_date - start_date).days))

	106 regression_range_sizes = []

	107 execution_times_seconds = []

	108 in_queue_times = []

	109 commits_analyzed = []

	110 culprits_detected = 0

	111 errors_detected = 0

	112 number_under_five_minutes = 0

	113 number_under_fifteen_minutes = 0

	114 number_under_thirty_minutes = 0

	115 number_over_thirty_minutes = 0

	116 total_number_of_try_jobs = len(try_job_data_list)

	117

	118 for try_job_data in try_job_data_list:

	119 # Regression range size.

	120 if try_job_data.regression_range_size:

	121 regression_range_sizes.append(try_job_data.regression_range_size)

	122

	123 # Execution time.

	124 if try_job_data.start_time and try_job_data.end_time:

	125 execution_time_delta = (

	126 try_job_data.end_time - try_job_data.start_time)

	127 execution_time = execution_time_delta.total_seconds()

	128 execution_times_seconds.append(execution_time)

	129

	130 # In-queue time.

	131 if try_job_data.start_time and try_job_data.request_time:

	132 in_queue_time_delta = (

	133 try_job_data.start_time - try_job_data.request_time)

	134 in_queue_time = in_queue_time_delta.total_seconds()

	135 in_queue_times.append(in_queue_time)

	136

	137 # Total time end-to-end.

	138 if try_job_data.request_time and try_job_data.end_time:

	139 total_time_delta = try_job_data.end_time - try_job_data.start_time

	140 total_time_seconds = total_time_delta.total_seconds()

	141

	142 if total_time_seconds < 300: # Under 5 minutes.

	143 number_under_five_minutes += 1

	144 elif total_time_seconds < 900: # Under 15 minutes.

	145 number_under_fifteen_minutes += 1

	146 elif total_time_seconds < 1800: # Under 30 minutes.

	147 number_under_thirty_minutes += 1

	148 else: # Over 30 minutes.

	149 number_over_thirty_minutes += 1

	150

	151 # Number of commits analyzed.

	152 if try_job_data.number_of_commits_analyzed:

	153 commits_analyzed.append(try_job_data.number_of_commits_analyzed)

	154

	155 # Culprit detection rate.

	156 if try_job_data.culprits:

	157 culprits_detected += 1

	158

	159 if try_job_data.error:

	160 errors_detected += 1

	161

	162 average_regression_range_size = _GetAverageOfNumbersInList(

	163 regression_range_sizes)

	164 average_execution_time = (_GetAverageOfNumbersInList(

	165 execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE)

	166 average_time_in_queue = (

	167 _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else

	168 NOT_AVAILABLE)

	169 average_commits_analyzed = _GetAverageOfNumbersInList(

	170 commits_analyzed)

	171 longest_execution_time = (

	172 str(datetime.timedelta(seconds=max(execution_times_seconds)))

	173 if execution_times_seconds else NOT_AVAILABLE)

	174 shortest_execution_time = (

	175 str(datetime.timedelta(seconds=min(execution_times_seconds)))

	176 if execution_times_seconds else NOT_AVAILABLE)

	177 detection_rate = float(culprits_detected) / total_number_of_try_jobs

	178 error_rate = float(errors_detected) / total_number_of_try_jobs

	179 time_per_revision = (average_execution_time / average_commits_analyzed if (

	180 average_execution_time != NOT_AVAILABLE and

	181 average_commits_analyzed != NOT_AVAILABLE) else NOT_AVAILABLE)

	182

	183 under_five_minutes_rate = (

	184 float(number_under_five_minutes) / total_number_of_try_jobs)

	185 under_fifteen_minutes_rate = (

	186 float(number_under_fifteen_minutes) / total_number_of_try_jobs)

	187 under_thirty_minutes_rate = (

	188 float(number_under_thirty_minutes) / total_number_of_try_jobs)

	189 over_thirty_minutes_rate = (

	190 float(number_over_thirty_minutes) / total_number_of_try_jobs)

	191

	192 return {

	193 'try_jobs_per_day': _FormatDigits(try_jobs_per_day),

	194 'average_regression_range_size': _FormatDigits(

	195 average_regression_range_size),

	196 'average_execution_time': _FormatSecondsAsHMS(

	197 _FormatDigits(average_execution_time)),

	198 'average_time_in_queue': _FormatSecondsAsHMS(

	199 _FormatDigits(average_time_in_queue)),

	200 'average_commits_analyzed': _FormatDigits(average_commits_analyzed),

	201 'longest_execution_time': longest_execution_time,

	202 'shortest_execution_time': shortest_execution_time,

	203 'number_of_try_jobs': number_of_try_jobs,

	204 'detection_rate': _FormatDigits(detection_rate),

	205 'error_rate': _FormatDigits(error_rate),

	206 'time_per_revision': _FormatSecondsAsHMS(

	207 _FormatDigits(time_per_revision)),

	208 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate),

	209 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate),

	210 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate),

	211 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate)

	212 }

	213

	214

	215 def PrintCommonStats(try_job_data_list, start_date, end_date, indent):

	216 """Takes a list of WfTryJobData entities and prints their stats."""

	217 spaces = ''

	218 for _ in range(indent):

	219 spaces += ' '

	220

	221 report_info = _GetReportInformation(try_job_data_list, start_date, end_date)

	222 for key, value in report_info.iteritems():

	223 print '%s%s: %s' % (spaces, key, value)

	224

	225

	226 def PrettyPrint(grouped_data, start_date, end_date, indent=0):

	227 if not grouped_data:

	228 return

	229 if isinstance(grouped_data, list):

	230 # Print the stats about the list.

	231 PrintCommonStats(grouped_data, start_date, end_date, indent)

	232 elif isinstance(grouped_data, dict):

	233 spaces = ''

	234 for _ in range(indent):

	235 spaces += ' '

	236

	237 for field, data in grouped_data.iteritems():

	238 print spaces + field

	239 PrettyPrint(data, start_date, end_date, indent + 2)

	240 else:

	241 raise Exception('grouped_data dict should only contain dicts or lists.')

	242

	243

	244 def _SplitListByTryJobType(try_job_data_list):

	245 categorized_data_dict = {

	246 'compile': [],

	247 'test': []

	248 }

	249 for try_job_data in try_job_data_list:

	250 if try_job_data.try_job_type.lower() == 'compile':

	251 categorized_data_dict['compile'].append(try_job_data)

	252 elif try_job_data.try_job_type.lower() == 'test':

	253 categorized_data_dict['test'].append(try_job_data)

	254

	255 return categorized_data_dict

	256

	257

	258 def _SplitListByMaster(try_job_data_list):

	259 categorized_data_dict = defaultdict(list)

	260

	261 for try_job_data in try_job_data_list:

	262 master_name = try_job_data.master_name

	263

	264 if not master_name:

	265 continue

	266

	267 categorized_data_dict[master_name].append(try_job_data)

	268

	269 return categorized_data_dict

	270

	271

	272 def _SplitListByBuilder(try_job_data_list):

	273 categorized_data_dict = defaultdict(list)

	274

	275 for try_job_data in try_job_data_list:

	276 builder_name = try_job_data.builder_name

	277

	278 if not builder_name:

	279 continue

	280

	281 categorized_data_dict[builder_name].append(try_job_data)

	282

	283 return categorized_data_dict

	284

	285

	286 def _SplitListByHeuristicResults(try_job_data_list):

	287 categorized_data_dict = {

	288 'with heuristic guidance': [],

	289 'without heuristic guidance': []

	290 }

	291 for try_job_data in try_job_data_list:

	292 if try_job_data.has_heuristic_results:

	293 categorized_data_dict['with heuristic guidance'].append(try_job_data)

	294 else:

	295 categorized_data_dict['without heuristic guidance'].append(try_job_data)

	296 return categorized_data_dict

	297

	298

	299 def _SplitListByCompileTargets(try_job_data_list):

	300 categorized_data_dict = {

	301 'with compile targets': [],

	302 'without compile targets': []

	303 }

	304 for try_job_data in try_job_data_list:

	305 if try_job_data.has_compile_targets:

	306 categorized_data_dict['with compile targets'].append(try_job_data)

	307 else:

	308 categorized_data_dict['without compile targets'].append(try_job_data)

	309 return categorized_data_dict

	310

	311

	312 def SplitListByOption(try_job_data_list, option):

	313 """Takes a WfTryJobData list and separates it into a dict based on arg.

	314

	315 Args:

	316 try_job_data_list: A list of WfTryJobData entities.

	317 option: An option with which to split the data by.

	318

	319 Returns:

	320 A dict where the keys are how the data is separated based on arg and the

	321 values are the corresponding lists of data.

	322 """

	323

	324 if option == 't': # Try job type.

	325 return _SplitListByTryJobType(try_job_data_list)

	326 elif option == 'm': # Main waterfall master.

	327 return _SplitListByMaster(try_job_data_list)

	328 elif option == 'b': # Main waterfall builder.

	329 return _SplitListByBuilder(try_job_data_list)

	330 elif option == 'r': # Whether or not heuristic results are included.

	331 return _SplitListByHeuristicResults(try_job_data_list)

	332 elif option == 'c': # Whether or not compile targets are included.

	333 return _SplitListByCompileTargets(try_job_data_list)

	334 # TODO(lijeffrey): Add support for splitting by platform.

	335

	336 # Unsupported flag, bail out without modification.

	337 return try_job_data_list

	338

	339

	340 def SplitStructByOption(try_job_data_struct, option):

	341 if isinstance(try_job_data_struct, list):

	342 try_job_data_struct = SplitListByOption(try_job_data_struct, option)

	343 elif isinstance(try_job_data_struct, dict):

	344 for key, struct in try_job_data_struct.iteritems():

	345 try_job_data_struct[key] = SplitStructByOption(struct, option)

	346 else:

	347 raise Exception('try job data dict must only contain lists or dicts.')

	348

	349 return try_job_data_struct

	350

	351

	352 def GetArgsInOrder():

	353 command_line_args = sys.argv[1:]

	354

	355 parser = argparse.ArgumentParser()

	356 parser.add_argument('-t', action='store_true',

	357 help='group try job data by type (compile, test)')

	358 parser.add_argument('-m', action='store_true',

	359 help='group try job data by master')

	360 parser.add_argument('-b', action='store_true',

	361 help='group try job data by builder')

	362 parser.add_argument('-r', action='store_true',

	363 help=('group try job data by those with and without '

	364 'heuristic results'))

	365 parser.add_argument('-c', action='store_true',

	366 help=('group try job data by those with and without '

	367 'compile targets'))

	368

	369 args_dict = vars(parser.parse_args())

	370

	371 # Preserve order from original command.

	372 ordered_args = []

	373

	374 for original_arg in command_line_args:

	375 parsed_arg = original_arg[1:]

	376 if args_dict[parsed_arg]:

	377 ordered_args.append(parsed_arg)

	378

	379 return ordered_args

	380

	381

	382 if __name__ == '__main__':

	383 # Set up the Remote API to use services on the live App Engine.

	384 remote_api.EnableRemoteApi(app_id='findit-for-me')

	385

	386 START_DATE = datetime.datetime(2016, 5, 1)

	387 END_DATE = datetime.datetime(2016, 6, 17)

	388

	389 try_job_data_query = WfTryJobData.query(

	390 WfTryJobData.request_time >= START_DATE,

	391 WfTryJobData.request_time < END_DATE)

	392 categorized_data = try_job_data_query.fetch()

	393

	394 args = GetArgsInOrder()

	395 for arg in args:

	396 categorized_data = SplitStructByOption(categorized_data, arg)

	397

	398 # TODO(lijeffrey): Display data in an html page instead of printing.

	399 PrettyPrint(categorized_data, START_DATE, END_DATE)

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »