Chromium Code Reviews| Index: appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py |
| diff --git a/appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py b/appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..35100b5863f159c0d5ff4d872d93a3af221b2045 |
| --- /dev/null |
| +++ b/appengine/findit/util_scripts/remote_queries/try_job_data_metrics.py |
| @@ -0,0 +1,394 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Pulls historical try job metadata from Findit and prints a report.""" |
| + |
| +from collections import defaultdict |
| +import datetime |
| +import os |
| +import sys |
| + |
| +_REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir) |
| +sys.path.insert(1, _REMOTE_API_DIR) |
| + |
| +import remote_api |
| + |
| +from model.wf_config import FinditConfig |
| +from model.wf_try_job_data import WfTryJobData |
| + |
| + |
| +NOT_AVAILABLE = 'N/A' |
| + |
| + |
| +def _GetAverageOfNumbersInList(numbers): |
| + """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty.""" |
| + return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE |
| + |
| + |
| +def _FormatDigits(number): |
| + """Formats number into a 2-digit float, or NOT_AVAILABLE.""" |
| + if isinstance(number, float): |
| + return float('%.2f' % number) |
| + return NOT_AVAILABLE |
| + |
| + |
| +def _FormatSecondsAsHMS(seconds): |
| + """Formats the number of seconds into hours, minutes, seconds.""" |
| + if seconds == NOT_AVAILABLE: |
| + return NOT_AVAILABLE |
| + |
| + minutes, seconds = divmod(seconds, 60) |
| + hours, minutes = divmod(minutes, 60) |
| + return '%d:%02d:%02d' % (hours, minutes, seconds) |
| + |
| + |
| +def _GetReportInformation(try_job_data_list, start_date, end_date): |
| + """Computes and returns try job metadata. |
| + |
| + Args: |
| + try_job_data_list: A list of WfTryJobData entities. |
| + start_date: The earliest request date to compute data. |
| + end_date: The latest request date to compute data. |
| + |
| + Returns: |
| + A dict in the following format: |
| + { |
| + 'try_jobs_per_day': The average number of jobs requested over the time |
| + period specified, |
| + 'average_regression_range_size': The average number of revisions in the |
| + regression range when the original failure was detected, |
| + 'average_execution_time': The average amount of time spent on each try |
| + job not including in-queue time. |
| + 'average_time_in_queue': The average amount of time a try job spends |
| + in-queue before it is picked up. |
| + 'average_commits_analyzed': The average number of revisions each try job |
| + needed to run before coming to a conclusion, |
| + 'longest_execution_time': The length of time of the slowest try job, |
| + 'shortest_execution_time': The length of time of the fastest try job, |
| + 'number_of_try_jobs': The number of try jobs in this list, |
| + 'detection_rate': The number of try jobs that found any culprits at all |
| + regardless of correctness over the total number of try jobs. |
| + 'error_rate': The number of try jobs that had an error / the total |
| + number of try jobs in the list. |
| + 'time_per_revision': The average amount of execution time spent on each |
| + revision. |
| + 'under_five_minutes_rate': The number of try jobs that finished under 5 |
| + minutes / total try jobs. |
| + 'under_fifteen_minutes_rate': The number of try jobs that finished in |
| + under 15 minutes / total try jobs. |
| + 'under_thirty_minutes_rate': The number of try jobs that finished in |
| + under 30 minutes / total try jobs. |
| + 'over_thirty_minutes_rate': The number of try jobs that finished in over |
| + 30 minutes / total try jobs. |
| + } |
| + """ |
| + try_jobs_per_day = NOT_AVAILABLE |
| + average_regression_range_size = NOT_AVAILABLE |
| + average_execution_time = NOT_AVAILABLE |
| + average_time_in_queue = NOT_AVAILABLE |
| + average_commits_analyzed = NOT_AVAILABLE |
| + longest_execution_time = NOT_AVAILABLE |
| + shortest_execution_time = NOT_AVAILABLE |
| + detection_rate = NOT_AVAILABLE |
| + error_rate = NOT_AVAILABLE |
| + number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0 |
| + time_per_revision = NOT_AVAILABLE |
| + under_five_minutes_rate = NOT_AVAILABLE |
| + under_fifteen_minutes_rate = NOT_AVAILABLE |
| + under_thirty_minutes_rate = NOT_AVAILABLE |
| + over_thirty_minutes_rate = NOT_AVAILABLE |
| + |
| + if try_job_data_list: |
| + try_jobs_per_day = ( |
| + len(try_job_data_list) / float((end_date - start_date).days)) |
| + regression_range_sizes = [] |
| + execution_times_seconds = [] |
| + in_queue_times = [] |
| + commits_analyzed = [] |
| + culprits_detected = 0 |
| + errors_detected = 0 |
| + number_under_five_minutes = 0 |
| + number_under_fifteen_minutes = 0 |
| + number_under_thirty_minutes = 0 |
| + number_over_thirty_minutes = 0 |
| + total_number_of_try_jobs = len(try_job_data_list) |
| + |
| + for try_job_data in try_job_data_list: |
| + # Regression range size. |
| + if try_job_data.regression_range_size: |
| + regression_range_sizes.append(try_job_data.regression_range_size) |
| + |
| + # Execution time. |
| + if try_job_data.start_time and try_job_data.end_time: |
| + execution_time_delta = ( |
| + try_job_data.end_time - try_job_data.start_time) |
| + execution_time = execution_time_delta.total_seconds() |
| + execution_times_seconds.append(execution_time) |
| + |
| + # In-queue time. |
| + if try_job_data.start_time and try_job_data.request_time: |
| + in_queue_time_delta = ( |
| + try_job_data.start_time - try_job_data.request_time) |
| + in_queue_time = in_queue_time_delta.total_seconds() |
| + in_queue_times.append(in_queue_time) |
| + |
| + # Total time end-to-end. |
| + if try_job_data.request_time and try_job_data.end_time: |
| + total_time_delta = try_job_data.end_time - try_job_data.start_time |
| + total_time_seconds = total_time_delta.total_seconds() |
| + |
| + if total_time_seconds < 300: # Under 5 minutes. |
| + number_under_five_minutes += 1 |
| + elif total_time_seconds < 900: # Under 15 minutes. |
| + number_under_fifteen_minutes += 1 |
| + elif total_time_seconds < 1800: # Under 30 minutes. |
| + number_under_thirty_minutes += 1 |
| + else: # Over 30 minutes. |
| + number_over_thirty_minutes += 1 |
| + |
| + # Number of commits analyzed. |
| + if try_job_data.number_of_commits_analyzed: |
| + commits_analyzed.append(try_job_data.number_of_commits_analyzed) |
| + |
| + # Culprit detection rate. |
| + if try_job_data.culprits: |
| + culprits_detected += 1 |
| + |
| + if try_job_data.error: |
| + errors_detected += 1 |
| + |
| + average_regression_range_size = _GetAverageOfNumbersInList( |
| + regression_range_sizes) |
| + average_execution_time = (_GetAverageOfNumbersInList( |
| + execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE) |
| + average_time_in_queue = ( |
| + _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else |
| + NOT_AVAILABLE) |
| + average_commits_analyzed = _GetAverageOfNumbersInList( |
| + commits_analyzed) |
| + longest_execution_time = ( |
| + str(datetime.timedelta(seconds=max(execution_times_seconds))) |
| + if execution_times_seconds else NOT_AVAILABLE) |
| + shortest_execution_time = ( |
| + str(datetime.timedelta(seconds=min(execution_times_seconds))) |
| + if execution_times_seconds else NOT_AVAILABLE) |
| + detection_rate = float(culprits_detected) / total_number_of_try_jobs |
| + error_rate = float(errors_detected) / total_number_of_try_jobs |
| + time_per_revision = (average_execution_time / average_commits_analyzed if ( |
| + average_execution_time != NOT_AVAILABLE and |
| + average_commits_analyzed != NOT_AVAILABLE) else NOT_AVAILABLE) |
| + |
| + under_five_minutes_rate = ( |
| + float(number_under_five_minutes) / total_number_of_try_jobs) |
| + under_fifteen_minutes_rate = ( |
| + float(number_under_fifteen_minutes) / total_number_of_try_jobs) |
| + under_thirty_minutes_rate = ( |
| + float(number_under_thirty_minutes) / total_number_of_try_jobs) |
| + over_thirty_minutes_rate = ( |
| + float(number_over_thirty_minutes) / total_number_of_try_jobs) |
| + |
| + return { |
| + 'try_jobs_per_day': _FormatDigits(try_jobs_per_day), |
| + 'average_regression_range_size': _FormatDigits( |
| + average_regression_range_size), |
| + 'average_execution_time': _FormatSecondsAsHMS( |
| + _FormatDigits(average_execution_time)), |
| + 'average_time_in_queue': _FormatSecondsAsHMS( |
| + _FormatDigits(average_time_in_queue)), |
| + 'average_commits_analyzed': _FormatDigits(average_commits_analyzed), |
| + 'longest_execution_time': longest_execution_time, |
| + 'shortest_execution_time': shortest_execution_time, |
| + 'number_of_try_jobs': number_of_try_jobs, |
| + 'detection_rate': _FormatDigits(detection_rate), |
| + 'error_rate': _FormatDigits(error_rate), |
| + 'time_per_revision': _FormatSecondsAsHMS( |
| + _FormatDigits(time_per_revision)), |
| + 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate), |
| + 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate), |
| + 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate), |
| + 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate) |
| + } |
| + |
| + |
| +def PrintCommonStats(try_job_data_list, start_date, end_date, indent): |
| + """Takes a list of WfTryJobData entities and prints their stats.""" |
| + spaces = '' |
| + for _ in range(indent): |
| + spaces += ' ' |
| + |
| + report_info = _GetReportInformation(try_job_data_list, start_date, end_date) |
| + for key, value in report_info.iteritems(): |
| + print '%s%s: %s' % (spaces, key, value) |
| + |
| + |
| +def PrettyPrint(grouped_data, start_date, end_date, indent=0): |
| + if not grouped_data: |
| + return |
| + if isinstance(grouped_data, list): |
| + # Print the stats about the list. |
| + PrintCommonStats(grouped_data, start_date, end_date, indent) |
| + elif isinstance(grouped_data, dict): |
| + spaces = '' |
| + for _ in range(indent): |
| + spaces += ' ' |
| + |
| + for field, data in grouped_data.iteritems(): |
| + print spaces + field |
| + PrettyPrint(data, start_date, end_date, indent + 2) |
| + else: |
| + raise Exception('grouped_data dict should only contain dicts or lists.') |
| + |
| + |
| +def _SplitListByTryJobType(try_job_data_list): |
| + categorized_data_dict = { |
| + 'compile': [], |
| + 'test': [] |
| + } |
| + for try_job_data in try_job_data_list: |
| + if try_job_data.try_job_type.lower() == 'compile': |
| + categorized_data_dict['compile'].append(try_job_data) |
| + elif try_job_data.try_job_type.lower() == 'test': |
| + categorized_data_dict['test'].append(try_job_data) |
| + |
| + return categorized_data_dict |
| + |
| + |
| +def _SplitListByMaster(try_job_data_list): |
| + categorized_data_dict = defaultdict(list) |
| + |
| + for try_job_data in try_job_data_list: |
| + master_name = try_job_data.master_name |
| + |
| + if not master_name: |
| + continue |
| + |
| + categorized_data_dict[master_name].append(try_job_data) |
| + |
| + return categorized_data_dict |
| + |
| + |
| +def _SplitListByBuilder(try_job_data_list): |
| + categorized_data_dict = defaultdict(list) |
| + |
| + for try_job_data in try_job_data_list: |
| + builder_name = try_job_data.builder_name |
| + |
| + if not builder_name: |
| + continue |
| + |
| + categorized_data_dict[builder_name].append(try_job_data) |
| + |
| + return categorized_data_dict |
| + |
| + |
| +def _SplitListByHeuristicResults(try_job_data_list): |
| + categorized_data_dict = { |
| + 'With heuristic guidance': [], |
| + 'Without heuristic guidance': [] |
| + } |
| + for try_job_data in try_job_data_list: |
| + if try_job_data.has_heuristic_results: |
| + categorized_data_dict['With heuristic guidance'].append(try_job_data) |
| + else: |
| + categorized_data_dict['Without heuristic guidance'].append(try_job_data) |
| + return categorized_data_dict |
| + |
| + |
| +def _SplitListByCompileTargets(try_job_data_list): |
| + categorized_data_dict = { |
| + 'With compile targets': [], |
| + 'Without compile targets': [] |
| + } |
| + for try_job_data in try_job_data_list: |
| + if try_job_data.has_compile_targets: |
| + categorized_data_dict['With compile targets'].append(try_job_data) |
| + else: |
| + categorized_data_dict['Without compile targets'].append(try_job_data) |
| + return categorized_data_dict |
| + |
| + |
| +def SplitListByArg(try_job_data_list, arg): |
| + """Takes a WfTryJobData list and separates it into a dict based on arg. |
| + |
| + Args: |
| + try_job_data_list: A list of WfTryJobData entities. |
| + arg: An argument with which to split the data by. Options include: |
| + --type: To categorized_data_dict by try job type ('compile', 'test'). |
| + --per-master: To categorize try_job_data_list by WfTryJobData.master_name. |
| + --per-builder: To categorize try_job_data_list by |
| + WfTryJobData.builder_name. |
| + --has-heuristic-results: To separate by whether or not heuristic results |
| + were passed to the try job. |
| + --has-compile-targets: To separate by whether or not compile targets were |
| + passed to the try job. Note this field is not meaningful for test try |
| + jobs. |
| + |
| + Returns: |
| + A dict where the keys are how the data is separated based on arg and the |
| + values are the corresponding lists of data. |
| + """ |
| + |
| + if arg == '--type': |
| + return _SplitListByTryJobType(try_job_data_list) |
| + elif arg == '--per-master': |
| + return _SplitListByMaster(try_job_data_list) |
| + elif arg == '--per-builder': |
| + # Since builders are tied to masters, group by master as well first. |
| + categorized_data_dict = _SplitListByMaster(try_job_data_list) |
| + |
| + for master, data_list in categorized_data_dict.iteritems(): |
| + categorized_data_dict[master] = _SplitListByBuilder(data_list) |
| + return categorized_data_dict |
| + elif arg == '--has-heuristic-results': |
| + return _SplitListByHeuristicResults(try_job_data_list) |
| + elif arg == '--has-compile-targets': |
| + return _SplitListByCompileTargets(try_job_data_list) |
| + # TODO(lijeffrey): Add support for splitting by platform. |
| + |
| + # Unsupported flag, bail out without modification. |
| + return try_job_data_list |
| + |
| + |
| +def SplitStructByArg(try_job_data_struct, arg): |
| + if isinstance(try_job_data_struct, list): |
| + try_job_data_struct = SplitListByArg(try_job_data_struct, arg) |
| + elif isinstance(try_job_data_struct, dict): |
| + for key, struct in try_job_data_struct.iteritems(): |
| + try_job_data_struct[key] = SplitStructByArg(struct, arg) |
| + else: |
| + raise Exception('try job data dict must only contain lists or dicts.') |
| + |
| + return try_job_data_struct |
| + |
| + |
| +def GetArgs(): |
| + # Split data based on command line passed. |
| + args = sys.argv[1:] |
|
stgao
2016/06/13 23:48:21
If we can't use https://docs.python.org/2/howto/ar
lijeffrey
2016/06/16 19:12:52
Done.
|
| + |
| + if '--per-master' in args and '--per-builder' in args: |
| + # Categorizing by builder should categorize per master regardles. Remove |
| + # per master to avoid doing it twice. |
| + args.remove('--per-master') |
| + |
| + return args |
| + |
| + |
| +if __name__ == '__main__': |
| + # Set up the Remote API to use services on the live App Engine. |
| + remote_api.EnableRemoteApi(app_id='findit-for-me') |
| + |
| + START_DATE = datetime.datetime(2016, 5, 1) |
| + END_DATE = datetime.datetime(2016, 6, 8) |
| + |
| + try_job_data_query = WfTryJobData.query( |
| + WfTryJobData.request_time >= START_DATE, |
| + WfTryJobData.request_time < END_DATE) |
| + categorized_data = try_job_data_query.fetch() |
| + |
| + args = GetArgs() |
| + |
| + for arg in args: |
| + categorized_data = SplitStructByArg(categorized_data, arg) |
| + |
| + PrettyPrint(categorized_data, START_DATE, END_DATE) |