Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1012)

Unified Diff: appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py

Issue 2056623002: [Findit] Adding script for custom try job queries (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py
diff --git a/appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py b/appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..35100b5863f159c0d5ff4d872d93a3af221b2045
--- /dev/null
+++ b/appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py
@@ -0,0 +1,394 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Pulls historical try job metadata from Findit and prints a report."""
+
+from collections import defaultdict
+import datetime
+import os
+import sys
+
+_REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)
+sys.path.insert(1, _REMOTE_API_DIR)
+
+import remote_api
+
+from model.wf_config import FinditConfig
+from model.wf_try_job_data import WfTryJobData
+
+
+NOT_AVAILABLE = 'N/A'
+
+
+def _GetAverageOfNumbersInList(numbers):
+ """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty."""
+ return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE
+
+
+def _FormatDigits(number):
+ """Formats number into a 2-digit float, or NOT_AVAILABLE."""
+ if isinstance(number, float):
+ return float('%.2f' % number)
+ return NOT_AVAILABLE
+
+
+def _FormatSecondsAsHMS(seconds):
+ """Formats the number of seconds into hours, minutes, seconds."""
+ if seconds == NOT_AVAILABLE:
+ return NOT_AVAILABLE
+
+ minutes, seconds = divmod(seconds, 60)
+ hours, minutes = divmod(minutes, 60)
+ return '%d:%02d:%02d' % (hours, minutes, seconds)
+
+
+def _GetReportInformation(try_job_data_list, start_date, end_date):
+ """Computes and returns try job metadata.
+
+ Args:
+ try_job_data_list: A list of WfTryJobData entities.
+ start_date: The earliest request date to compute data.
+ end_date: The latest request date to compute data.
+
+ Returns:
+ A dict in the following format:
+ {
+ 'try_jobs_per_day': The average number of jobs requested over the time
+ period specified,
+ 'average_regression_range_size': The average number of revisions in the
+ regression range when the original failure was detected,
+ 'average_execution_time': The average amount of time spent on each try
+ job not including in-queue time.
+ 'average_time_in_queue': The average amount of time a try job spends
+ in-queue before it is picked up.
+ 'average_commits_analyzed': The average number of revisions each try job
+ needed to run before coming to a conclusion,
+ 'longest_execution_time': The length of time of the slowest try job,
+ 'shortest_execution_time': The length of time of the fastest try job,
+ 'number_of_try_jobs': The number of try jobs in this list,
+ 'detection_rate': The number of try jobs that found any culprits at all
+ regardless of correctness over the total number of try jobs.
+ 'error_rate': The number of try jobs that had an error / the total
+ number of try jobs in the list.
+ 'time_per_revision': The average amount of execution time spent on each
+ revision.
+ 'under_five_minutes_rate': The number of try jobs that finished under 5
+ minutes / total try jobs.
+ 'under_fifteen_minutes_rate': The number of try jobs that finished in
+ under 15 minutes / total try jobs.
+ 'under_thirty_minutes_rate': The number of try jobs that finished in
+ under 30 minutes / total try jobs.
+ 'over_thirty_minutes_rate': The number of try jobs that finished in over
+ 30 minutes / total try jobs.
+ }
+ """
+ try_jobs_per_day = NOT_AVAILABLE
+ average_regression_range_size = NOT_AVAILABLE
+ average_execution_time = NOT_AVAILABLE
+ average_time_in_queue = NOT_AVAILABLE
+ average_commits_analyzed = NOT_AVAILABLE
+ longest_execution_time = NOT_AVAILABLE
+ shortest_execution_time = NOT_AVAILABLE
+ detection_rate = NOT_AVAILABLE
+ error_rate = NOT_AVAILABLE
+ number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0
+ time_per_revision = NOT_AVAILABLE
+ under_five_minutes_rate = NOT_AVAILABLE
+ under_fifteen_minutes_rate = NOT_AVAILABLE
+ under_thirty_minutes_rate = NOT_AVAILABLE
+ over_thirty_minutes_rate = NOT_AVAILABLE
+
+ if try_job_data_list:
+ try_jobs_per_day = (
+ len(try_job_data_list) / float((end_date - start_date).days))
+ regression_range_sizes = []
+ execution_times_seconds = []
+ in_queue_times = []
+ commits_analyzed = []
+ culprits_detected = 0
+ errors_detected = 0
+ number_under_five_minutes = 0
+ number_under_fifteen_minutes = 0
+ number_under_thirty_minutes = 0
+ number_over_thirty_minutes = 0
+ total_number_of_try_jobs = len(try_job_data_list)
+
+ for try_job_data in try_job_data_list:
+ # Regression range size.
+ if try_job_data.regression_range_size:
+ regression_range_sizes.append(try_job_data.regression_range_size)
+
+ # Execution time.
+ if try_job_data.start_time and try_job_data.end_time:
+ execution_time_delta = (
+ try_job_data.end_time - try_job_data.start_time)
+ execution_time = execution_time_delta.total_seconds()
+ execution_times_seconds.append(execution_time)
+
+ # In-queue time.
+ if try_job_data.start_time and try_job_data.request_time:
+ in_queue_time_delta = (
+ try_job_data.start_time - try_job_data.request_time)
+ in_queue_time = in_queue_time_delta.total_seconds()
+ in_queue_times.append(in_queue_time)
+
+ # Total time end-to-end.
+ if try_job_data.request_time and try_job_data.end_time:
+ total_time_delta = try_job_data.end_time - try_job_data.start_time
+ total_time_seconds = total_time_delta.total_seconds()
+
+ if total_time_seconds < 300: # Under 5 minutes.
+ number_under_five_minutes += 1
+ elif total_time_seconds < 900: # Under 15 minutes.
+ number_under_fifteen_minutes += 1
+ elif total_time_seconds < 1800: # Under 30 minutes.
+ number_under_thirty_minutes += 1
+ else: # Over 30 minutes.
+ number_over_thirty_minutes += 1
+
+ # Number of commits analyzed.
+ if try_job_data.number_of_commits_analyzed:
+ commits_analyzed.append(try_job_data.number_of_commits_analyzed)
+
+ # Culprit detection rate.
+ if try_job_data.culprits:
+ culprits_detected += 1
+
+ if try_job_data.error:
+ errors_detected += 1
+
+ average_regression_range_size = _GetAverageOfNumbersInList(
+ regression_range_sizes)
+ average_execution_time = (_GetAverageOfNumbersInList(
+ execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE)
+ average_time_in_queue = (
+ _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else
+ NOT_AVAILABLE)
+ average_commits_analyzed = _GetAverageOfNumbersInList(
+ commits_analyzed)
+ longest_execution_time = (
+ str(datetime.timedelta(seconds=max(execution_times_seconds)))
+ if execution_times_seconds else NOT_AVAILABLE)
+ shortest_execution_time = (
+ str(datetime.timedelta(seconds=min(execution_times_seconds)))
+ if execution_times_seconds else NOT_AVAILABLE)
+ detection_rate = float(culprits_detected) / total_number_of_try_jobs
+ error_rate = float(errors_detected) / total_number_of_try_jobs
+ time_per_revision = (average_execution_time / average_commits_analyzed if (
+ average_execution_time != NOT_AVAILABLE and
+ average_commits_analyzed != NOT_AVAILABLE) else NOT_AVAILABLE)
+
+ under_five_minutes_rate = (
+ float(number_under_five_minutes) / total_number_of_try_jobs)
+ under_fifteen_minutes_rate = (
+ float(number_under_fifteen_minutes) / total_number_of_try_jobs)
+ under_thirty_minutes_rate = (
+ float(number_under_thirty_minutes) / total_number_of_try_jobs)
+ over_thirty_minutes_rate = (
+ float(number_over_thirty_minutes) / total_number_of_try_jobs)
+
+ return {
+ 'try_jobs_per_day': _FormatDigits(try_jobs_per_day),
+ 'average_regression_range_size': _FormatDigits(
+ average_regression_range_size),
+ 'average_execution_time': _FormatSecondsAsHMS(
+ _FormatDigits(average_execution_time)),
+ 'average_time_in_queue': _FormatSecondsAsHMS(
+ _FormatDigits(average_time_in_queue)),
+ 'average_commits_analyzed': _FormatDigits(average_commits_analyzed),
+ 'longest_execution_time': longest_execution_time,
+ 'shortest_execution_time': shortest_execution_time,
+ 'number_of_try_jobs': number_of_try_jobs,
+ 'detection_rate': _FormatDigits(detection_rate),
+ 'error_rate': _FormatDigits(error_rate),
+ 'time_per_revision': _FormatSecondsAsHMS(
+ _FormatDigits(time_per_revision)),
+ 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate),
+ 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate),
+ 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate),
+ 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate)
+ }
+
+
+def PrintCommonStats(try_job_data_list, start_date, end_date, indent):
+ """Takes a list of WfTryJobData entities and prints their stats."""
+ spaces = ''
+ for _ in range(indent):
+ spaces += ' '
+
+ report_info = _GetReportInformation(try_job_data_list, start_date, end_date)
+ for key, value in report_info.iteritems():
+ print '%s%s: %s' % (spaces, key, value)
+
+
+def PrettyPrint(grouped_data, start_date, end_date, indent=0):
+ if not grouped_data:
+ return
+ if isinstance(grouped_data, list):
+ # Print the stats about the list.
+ PrintCommonStats(grouped_data, start_date, end_date, indent)
+ elif isinstance(grouped_data, dict):
+ spaces = ''
+ for _ in range(indent):
+ spaces += ' '
+
+ for field, data in grouped_data.iteritems():
+ print spaces + field
+ PrettyPrint(data, start_date, end_date, indent + 2)
+ else:
+ raise Exception('grouped_data dict should only contain dicts or lists.')
+
+
+def _SplitListByTryJobType(try_job_data_list):
+ categorized_data_dict = {
+ 'compile': [],
+ 'test': []
+ }
+ for try_job_data in try_job_data_list:
+ if try_job_data.try_job_type.lower() == 'compile':
+ categorized_data_dict['compile'].append(try_job_data)
+ elif try_job_data.try_job_type.lower() == 'test':
+ categorized_data_dict['test'].append(try_job_data)
+
+ return categorized_data_dict
+
+
+def _SplitListByMaster(try_job_data_list):
+ categorized_data_dict = defaultdict(list)
+
+ for try_job_data in try_job_data_list:
+ master_name = try_job_data.master_name
+
+ if not master_name:
+ continue
+
+ categorized_data_dict[master_name].append(try_job_data)
+
+ return categorized_data_dict
+
+
+def _SplitListByBuilder(try_job_data_list):
+ categorized_data_dict = defaultdict(list)
+
+ for try_job_data in try_job_data_list:
+ builder_name = try_job_data.builder_name
+
+ if not builder_name:
+ continue
+
+ categorized_data_dict[builder_name].append(try_job_data)
+
+ return categorized_data_dict
+
+
+def _SplitListByHeuristicResults(try_job_data_list):
+ categorized_data_dict = {
+ 'With heuristic guidance': [],
+ 'Without heuristic guidance': []
+ }
+ for try_job_data in try_job_data_list:
+ if try_job_data.has_heuristic_results:
+ categorized_data_dict['With heuristic guidance'].append(try_job_data)
+ else:
+ categorized_data_dict['Without heuristic guidance'].append(try_job_data)
+ return categorized_data_dict
+
+
+def _SplitListByCompileTargets(try_job_data_list):
+ categorized_data_dict = {
+ 'With compile targets': [],
+ 'Without compile targets': []
+ }
+ for try_job_data in try_job_data_list:
+ if try_job_data.has_compile_targets:
+ categorized_data_dict['With compile targets'].append(try_job_data)
+ else:
+ categorized_data_dict['Without compile targets'].append(try_job_data)
+ return categorized_data_dict
+
+
+def SplitListByArg(try_job_data_list, arg):
+ """Takes a WfTryJobData list and separates it into a dict based on arg.
+
+ Args:
+ try_job_data_list: A list of WfTryJobData entities.
+ arg: An argument with which to split the data by. Options include:
+ --type: To categorized_data_dict by try job type ('compile', 'test').
+ --per-master: To categorize try_job_data_list by WfTryJobData.master_name.
+ --per-builder: To categorize try_job_data_list by
+ WfTryJobData.builder_name.
+ --has-heuristic-results: To separate by whether or not heuristic results
+ were passed to the try job.
+ --has-compile-targets: To separate by whether or not compile targets were
+ passed to the try job. Note this field is not meaningful for test try
+ jobs.
+
+ Returns:
+ A dict where the keys are how the data is separated based on arg and the
+ values are the corresponding lists of data.
+ """
+
+ if arg == '--type':
+ return _SplitListByTryJobType(try_job_data_list)
+ elif arg == '--per-master':
+ return _SplitListByMaster(try_job_data_list)
+ elif arg == '--per-builder':
+ # Since builders are tied to masters, group by master as well first.
+ categorized_data_dict = _SplitListByMaster(try_job_data_list)
+
+ for master, data_list in categorized_data_dict.iteritems():
+ categorized_data_dict[master] = _SplitListByBuilder(data_list)
+ return categorized_data_dict
+ elif arg == '--has-heuristic-results':
+ return _SplitListByHeuristicResults(try_job_data_list)
+ elif arg == '--has-compile-targets':
+ return _SplitListByCompileTargets(try_job_data_list)
+ # TODO(lijeffrey): Add support for splitting by platform.
+
+ # Unsupported flag, bail out without modification.
+ return try_job_data_list
+
+
+def SplitStructByArg(try_job_data_struct, arg):
+ if isinstance(try_job_data_struct, list):
+ try_job_data_struct = SplitListByArg(try_job_data_struct, arg)
+ elif isinstance(try_job_data_struct, dict):
+ for key, struct in try_job_data_struct.iteritems():
+ try_job_data_struct[key] = SplitStructByArg(struct, arg)
+ else:
+ raise Exception('try job data dict must only contain lists or dicts.')
+
+ return try_job_data_struct
+
+
+def GetArgs():
+ # Split data based on command line passed.
+ args = sys.argv[1:]
stgao 2016/06/13 23:48:21 If we can't use https://docs.python.org/2/howto/ar
lijeffrey 2016/06/16 19:12:52 Done.
+
+ if '--per-master' in args and '--per-builder' in args:
+ # Categorizing by builder should categorize per master regardles. Remove
+ # per master to avoid doing it twice.
+ args.remove('--per-master')
+
+ return args
+
+
+if __name__ == '__main__':
+ # Set up the Remote API to use services on the live App Engine.
+ remote_api.EnableRemoteApi(app_id='findit-for-me')
+
+ START_DATE = datetime.datetime(2016, 5, 1)
+ END_DATE = datetime.datetime(2016, 6, 8)
+
+ try_job_data_query = WfTryJobData.query(
+ WfTryJobData.request_time >= START_DATE,
+ WfTryJobData.request_time < END_DATE)
+ categorized_data = try_job_data_query.fetch()
+
+ args = GetArgs()
+
+ for arg in args:
+ categorized_data = SplitStructByArg(categorized_data, arg)
+
+ PrettyPrint(categorized_data, START_DATE, END_DATE)
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698