appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py - Issue 2056623002: [Findit] Adding script for custom try job queries

Unified Diff: appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py

Issue 2056623002: [Findit] Adding script for custom try job queries (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py

diff --git a/appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py b/appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py

new file mode 100644

index 0000000000000000000000000000000000000000..35100b5863f159c0d5ff4d872d93a3af221b2045

--- /dev/null

+++ b/appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py

@@ -0,0 +1,394 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Pulls historical try job metadata from Findit and prints a report."""

+from collections import defaultdict

+import datetime

+import os

+import sys

+_REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)

+sys.path.insert(1, _REMOTE_API_DIR)

+import remote_api

+from model.wf_config import FinditConfig

+from model.wf_try_job_data import WfTryJobData

+NOT_AVAILABLE = 'N/A'

+def _GetAverageOfNumbersInList(numbers):

+ """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty."""

+ return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE

+def _FormatDigits(number):

+ """Formats number into a 2-digit float, or NOT_AVAILABLE."""

+ if isinstance(number, float):

+ return float('%.2f' % number)

+ return NOT_AVAILABLE

+def _FormatSecondsAsHMS(seconds):

+ """Formats the number of seconds into hours, minutes, seconds."""

+ if seconds == NOT_AVAILABLE:

+ return NOT_AVAILABLE

+ minutes, seconds = divmod(seconds, 60)

+ hours, minutes = divmod(minutes, 60)

+ return '%d:%02d:%02d' % (hours, minutes, seconds)

+def _GetReportInformation(try_job_data_list, start_date, end_date):

+ """Computes and returns try job metadata.

+ Args:

+ try_job_data_list: A list of WfTryJobData entities.

+ start_date: The earliest request date to compute data.

+ end_date: The latest request date to compute data.

+ Returns:

+ A dict in the following format:

+ {

+ 'try_jobs_per_day': The average number of jobs requested over the time

+ period specified,

+ 'average_regression_range_size': The average number of revisions in the

+ regression range when the original failure was detected,

+ 'average_execution_time': The average amount of time spent on each try

+ job not including in-queue time.

+ 'average_time_in_queue': The average amount of time a try job spends

+ in-queue before it is picked up.

+ 'average_commits_analyzed': The average number of revisions each try job

+ needed to run before coming to a conclusion,

+ 'longest_execution_time': The length of time of the slowest try job,

+ 'shortest_execution_time': The length of time of the fastest try job,

+ 'number_of_try_jobs': The number of try jobs in this list,

+ 'detection_rate': The number of try jobs that found any culprits at all

+ regardless of correctness over the total number of try jobs.

+ 'error_rate': The number of try jobs that had an error / the total

+ number of try jobs in the list.

+ 'time_per_revision': The average amount of execution time spent on each

+ revision.

+ 'under_five_minutes_rate': The number of try jobs that finished under 5

+ minutes / total try jobs.

+ 'under_fifteen_minutes_rate': The number of try jobs that finished in

+ under 15 minutes / total try jobs.

+ 'under_thirty_minutes_rate': The number of try jobs that finished in

+ under 30 minutes / total try jobs.

+ 'over_thirty_minutes_rate': The number of try jobs that finished in over

+ 30 minutes / total try jobs.

+ }

+ """

+ try_jobs_per_day = NOT_AVAILABLE

+ average_regression_range_size = NOT_AVAILABLE

+ average_execution_time = NOT_AVAILABLE

+ average_time_in_queue = NOT_AVAILABLE

+ average_commits_analyzed = NOT_AVAILABLE

+ longest_execution_time = NOT_AVAILABLE

+ shortest_execution_time = NOT_AVAILABLE

+ detection_rate = NOT_AVAILABLE

+ error_rate = NOT_AVAILABLE

+ number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0

+ time_per_revision = NOT_AVAILABLE

+ under_five_minutes_rate = NOT_AVAILABLE

+ under_fifteen_minutes_rate = NOT_AVAILABLE

+ under_thirty_minutes_rate = NOT_AVAILABLE

+ over_thirty_minutes_rate = NOT_AVAILABLE

+ if try_job_data_list:

+ try_jobs_per_day = (

+ len(try_job_data_list) / float((end_date - start_date).days))

+ regression_range_sizes = []

+ execution_times_seconds = []

+ in_queue_times = []

+ commits_analyzed = []

+ culprits_detected = 0

+ errors_detected = 0

+ number_under_five_minutes = 0

+ number_under_fifteen_minutes = 0

+ number_under_thirty_minutes = 0

+ number_over_thirty_minutes = 0

+ total_number_of_try_jobs = len(try_job_data_list)

+ for try_job_data in try_job_data_list:

+ # Regression range size.

+ if try_job_data.regression_range_size:

+ regression_range_sizes.append(try_job_data.regression_range_size)

+ # Execution time.

+ if try_job_data.start_time and try_job_data.end_time:

+ execution_time_delta = (

+ try_job_data.end_time - try_job_data.start_time)

+ execution_time = execution_time_delta.total_seconds()

+ execution_times_seconds.append(execution_time)

+ # In-queue time.

+ if try_job_data.start_time and try_job_data.request_time:

+ in_queue_time_delta = (

+ try_job_data.start_time - try_job_data.request_time)

+ in_queue_time = in_queue_time_delta.total_seconds()

+ in_queue_times.append(in_queue_time)

+ # Total time end-to-end.

+ if try_job_data.request_time and try_job_data.end_time:

+ total_time_delta = try_job_data.end_time - try_job_data.start_time

+ total_time_seconds = total_time_delta.total_seconds()

+ if total_time_seconds < 300: # Under 5 minutes.

+ number_under_five_minutes += 1

+ elif total_time_seconds < 900: # Under 15 minutes.

+ number_under_fifteen_minutes += 1

+ elif total_time_seconds < 1800: # Under 30 minutes.

+ number_under_thirty_minutes += 1

+ else: # Over 30 minutes.

+ number_over_thirty_minutes += 1

+ # Number of commits analyzed.

+ if try_job_data.number_of_commits_analyzed:

+ commits_analyzed.append(try_job_data.number_of_commits_analyzed)

+ # Culprit detection rate.

+ if try_job_data.culprits:

+ culprits_detected += 1

+ if try_job_data.error:

+ errors_detected += 1

+ average_regression_range_size = _GetAverageOfNumbersInList(

+ regression_range_sizes)

+ average_execution_time = (_GetAverageOfNumbersInList(

+ execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE)

+ average_time_in_queue = (

+ _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else

+ NOT_AVAILABLE)

+ average_commits_analyzed = _GetAverageOfNumbersInList(

+ commits_analyzed)

+ longest_execution_time = (

+ str(datetime.timedelta(seconds=max(execution_times_seconds)))

+ if execution_times_seconds else NOT_AVAILABLE)

+ shortest_execution_time = (

+ str(datetime.timedelta(seconds=min(execution_times_seconds)))

+ if execution_times_seconds else NOT_AVAILABLE)

+ detection_rate = float(culprits_detected) / total_number_of_try_jobs

+ error_rate = float(errors_detected) / total_number_of_try_jobs

+ time_per_revision = (average_execution_time / average_commits_analyzed if (

+ average_execution_time != NOT_AVAILABLE and

+ average_commits_analyzed != NOT_AVAILABLE) else NOT_AVAILABLE)

+ under_five_minutes_rate = (

+ float(number_under_five_minutes) / total_number_of_try_jobs)

+ under_fifteen_minutes_rate = (

+ float(number_under_fifteen_minutes) / total_number_of_try_jobs)

+ under_thirty_minutes_rate = (

+ float(number_under_thirty_minutes) / total_number_of_try_jobs)

+ over_thirty_minutes_rate = (

+ float(number_over_thirty_minutes) / total_number_of_try_jobs)

+ return {

+ 'try_jobs_per_day': _FormatDigits(try_jobs_per_day),

+ 'average_regression_range_size': _FormatDigits(

+ average_regression_range_size),

+ 'average_execution_time': _FormatSecondsAsHMS(

+ _FormatDigits(average_execution_time)),

+ 'average_time_in_queue': _FormatSecondsAsHMS(

+ _FormatDigits(average_time_in_queue)),

+ 'average_commits_analyzed': _FormatDigits(average_commits_analyzed),

+ 'longest_execution_time': longest_execution_time,

+ 'shortest_execution_time': shortest_execution_time,

+ 'number_of_try_jobs': number_of_try_jobs,

+ 'detection_rate': _FormatDigits(detection_rate),

+ 'error_rate': _FormatDigits(error_rate),

+ 'time_per_revision': _FormatSecondsAsHMS(

+ _FormatDigits(time_per_revision)),

+ 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate),

+ 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate),

+ 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate),

+ 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate)

+ }

+def PrintCommonStats(try_job_data_list, start_date, end_date, indent):

+ """Takes a list of WfTryJobData entities and prints their stats."""

+ spaces = ''

+ for _ in range(indent):

+ spaces += ' '

+ report_info = _GetReportInformation(try_job_data_list, start_date, end_date)

+ for key, value in report_info.iteritems():

+ print '%s%s: %s' % (spaces, key, value)

+def PrettyPrint(grouped_data, start_date, end_date, indent=0):

+ if not grouped_data:

+ return

+ if isinstance(grouped_data, list):

+ # Print the stats about the list.

+ PrintCommonStats(grouped_data, start_date, end_date, indent)

+ elif isinstance(grouped_data, dict):

+ spaces = ''

+ for _ in range(indent):

+ spaces += ' '

+ for field, data in grouped_data.iteritems():

+ print spaces + field

+ PrettyPrint(data, start_date, end_date, indent + 2)

+ else:

+ raise Exception('grouped_data dict should only contain dicts or lists.')

+def _SplitListByTryJobType(try_job_data_list):

+ categorized_data_dict = {

+ 'compile': [],

+ 'test': []

+ }

+ for try_job_data in try_job_data_list:

+ if try_job_data.try_job_type.lower() == 'compile':

+ categorized_data_dict['compile'].append(try_job_data)

+ elif try_job_data.try_job_type.lower() == 'test':

+ categorized_data_dict['test'].append(try_job_data)

+ return categorized_data_dict

+def _SplitListByMaster(try_job_data_list):

+ categorized_data_dict = defaultdict(list)

+ for try_job_data in try_job_data_list:

+ master_name = try_job_data.master_name

+ if not master_name:

+ continue

+ categorized_data_dict[master_name].append(try_job_data)

+ return categorized_data_dict

+def _SplitListByBuilder(try_job_data_list):

+ categorized_data_dict = defaultdict(list)

+ for try_job_data in try_job_data_list:

+ builder_name = try_job_data.builder_name

+ if not builder_name:

+ continue

+ categorized_data_dict[builder_name].append(try_job_data)

+ return categorized_data_dict

+def _SplitListByHeuristicResults(try_job_data_list):

+ categorized_data_dict = {

+ 'With heuristic guidance': [],

+ 'Without heuristic guidance': []

+ }

+ for try_job_data in try_job_data_list:

+ if try_job_data.has_heuristic_results:

+ categorized_data_dict['With heuristic guidance'].append(try_job_data)

+ else:

+ categorized_data_dict['Without heuristic guidance'].append(try_job_data)

+ return categorized_data_dict

+def _SplitListByCompileTargets(try_job_data_list):

+ categorized_data_dict = {

+ 'With compile targets': [],

+ 'Without compile targets': []

+ }

+ for try_job_data in try_job_data_list:

+ if try_job_data.has_compile_targets:

+ categorized_data_dict['With compile targets'].append(try_job_data)

+ else:

+ categorized_data_dict['Without compile targets'].append(try_job_data)

+ return categorized_data_dict

+def SplitListByArg(try_job_data_list, arg):

+ """Takes a WfTryJobData list and separates it into a dict based on arg.

+ Args:

+ try_job_data_list: A list of WfTryJobData entities.

+ arg: An argument with which to split the data by. Options include:

+ --type: To categorized_data_dict by try job type ('compile', 'test').

+ --per-master: To categorize try_job_data_list by WfTryJobData.master_name.

+ --per-builder: To categorize try_job_data_list by

+ WfTryJobData.builder_name.

+ --has-heuristic-results: To separate by whether or not heuristic results

+ were passed to the try job.

+ --has-compile-targets: To separate by whether or not compile targets were

+ passed to the try job. Note this field is not meaningful for test try

+ jobs.

+ Returns:

+ A dict where the keys are how the data is separated based on arg and the

+ values are the corresponding lists of data.

+ """

+ if arg == '--type':

+ return _SplitListByTryJobType(try_job_data_list)

+ elif arg == '--per-master':

+ return _SplitListByMaster(try_job_data_list)

+ elif arg == '--per-builder':

+ # Since builders are tied to masters, group by master as well first.

+ categorized_data_dict = _SplitListByMaster(try_job_data_list)

+ for master, data_list in categorized_data_dict.iteritems():

+ categorized_data_dict[master] = _SplitListByBuilder(data_list)

+ return categorized_data_dict

+ elif arg == '--has-heuristic-results':

+ return _SplitListByHeuristicResults(try_job_data_list)

+ elif arg == '--has-compile-targets':

+ return _SplitListByCompileTargets(try_job_data_list)

+ # TODO(lijeffrey): Add support for splitting by platform.

+ # Unsupported flag, bail out without modification.

+ return try_job_data_list

+def SplitStructByArg(try_job_data_struct, arg):

+ if isinstance(try_job_data_struct, list):

+ try_job_data_struct = SplitListByArg(try_job_data_struct, arg)

+ elif isinstance(try_job_data_struct, dict):

+ for key, struct in try_job_data_struct.iteritems():

+ try_job_data_struct[key] = SplitStructByArg(struct, arg)

+ else:

+ raise Exception('try job data dict must only contain lists or dicts.')

+ return try_job_data_struct

+def GetArgs():

+ # Split data based on command line passed.

+ args = sys.argv[1:]

stgao 2016/06/13 23:48:21 If we can't use https://docs.python.org/2/howto/ar

lijeffrey 2016/06/16 19:12:52 Done.

+ if '--per-master' in args and '--per-builder' in args:

+ # Categorizing by builder should categorize per master regardles. Remove

+ # per master to avoid doing it twice.

+ args.remove('--per-master')

+ return args

+if __name__ == '__main__':

+ # Set up the Remote API to use services on the live App Engine.

+ remote_api.EnableRemoteApi(app_id='findit-for-me')

+ START_DATE = datetime.datetime(2016, 5, 1)

+ END_DATE = datetime.datetime(2016, 6, 8)

+ try_job_data_query = WfTryJobData.query(

+ WfTryJobData.request_time >= START_DATE,

+ WfTryJobData.request_time < END_DATE)

+ categorized_data = try_job_data_query.fetch()

+ args = GetArgs()

+ for arg in args:

+ categorized_data = SplitStructByArg(categorized_data, arg)

+ PrettyPrint(categorized_data, START_DATE, END_DATE)

« no previous file with comments | « no previous file | no next file » | no next file with comments »