appengine/findit/util_scripts/remote_queries/swarming_task_data_query.py - Issue 1772173002: [Findit] Adding swarming task metadata query page

Unified Diff: appengine/findit/util_scripts/remote_queries/swarming_task_data_query.py

Issue 1772173002: [Findit] Adding swarming task metadata query page (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Cleanup Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: appengine/findit/util_scripts/remote_queries/swarming_task_data_query.py

diff --git a/appengine/findit/util_scripts/remote_queries/swarming_task_data_query.py b/appengine/findit/util_scripts/remote_queries/swarming_task_data_query.py

new file mode 100644

index 0000000000000000000000000000000000000000..78b3676134325141308c9ba1486662ba87dd8865

--- /dev/null

+++ b/appengine/findit/util_scripts/remote_queries/swarming_task_data_query.py

@@ -0,0 +1,356 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Pulls historical swarming task metadata from Findit and prints a report."""

+from collections import defaultdict

+from collections import OrderedDict

+import datetime

+import os

+import sys

+_REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)

+sys.path.insert(1, _REMOTE_API_DIR)

+import remote_api

+from model import wf_analysis_status

+from model.wf_swarming_task import WfSwarmingTask

+NOT_AVAILABLE = 'N/A'

+# TODO(lijeffrey): Refactor helper methods into module sharable with

+# try_job_data_query.py.

+def _GetAverageOfNumbersInList(numbers):

+ """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty."""

+ return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE

+def _FormatDigits(number):

+ """Formats number into a 2-digit float, or NOT_AVAILABLE."""

+ if isinstance(number, (int, float)):

+ return float('%.2f' % number)

+ return NOT_AVAILABLE

+def _FormatSecondsAsHMS(seconds):

+ """Formats the number of seconds into hours, minutes, seconds."""

+ if seconds == NOT_AVAILABLE:

+ return NOT_AVAILABLE

+ minutes, seconds = divmod(seconds, 60)

+ hours, minutes = divmod(minutes, 60)

+ return '%d:%02d:%02d' % (hours, minutes, seconds)

+def _FormatStepName(step_name):

+ # Formats step_name to return only the first word (the step name itself).

+ # Step names are expected to be in either the format 'step_name' or

+ # 'step_name on platform'.

+ return step_name.strip().split(' ')[0]

+def _CategorizeSwarmingTaskData(swarming_task_list):

+ """Categorizes swarming_task_list into a dict.

+ Args:

+ swarming_task_list: A list of WfSwarmingTask objects.

+ Returns:

+ A dict in the format:

+ {

+ priority1: {

+ master_name1': {

+ 'builder_name1': {

+ 'step_name1': [WfSwarmingTask1, WfSwarmingTask2, ...],

+ ...

+ },

+ ...

+ },

+ ...

+ },

+ ...

+ }

+ """

+ categorized_data = defaultdict(

+ lambda: defaultdict(

+ lambda: defaultdict(list))))

+ for swarming_task in swarming_task_list:

+ if (not swarming_task.parameters or not swarming_task.tests_statuses or

+ swarming_task.status != wf_analysis_status.ANALYZED):

+ # Disregard any swarming tasks that are not yet completed or were

+ # triggered before 'parameters' and 'tests_statuses' were introduced.

+ continue

+ priority = swarming_task.parameters['priority']

+ master_name = swarming_task.master_name

+ builder_name = swarming_task.builder_name

+ step_name = swarming_task.key.id()

+ categorized_data[priority][master_name][builder_name][step_name].append(

+ swarming_task)

+ return categorized_data

+def _GetReportInformation(swarming_task_list, start_date, end_date):

+ """Computes and returns swarming task metadata in a dict.

+ Args:

+ swarming_task_list: A list of WfSwarmingTask entities.

+ start_date: The earliest request date to compute data.

+ end_date: The latest request date to compute data.

+ Returns:

+ A dict in the following format:

+ {

+ 'swarming_tasks_per_day': The average number of swwarming tasks

+ requested over the time period specified,

+ 'average_execution_time': The average amount of time spent on each

+ swarming task not including in-queue time.

+ 'average_time_in_queue': The average amount of time a swarming task

+ spends in-queue before it is picked up.

+ 'longest_execution_time': The length of time of the slowest swarming

+ task in the period requested,

+ 'shortest_execution_time': The length of time of the fastest swarming

+ task in the period requested.

+ 'tests_times_iterations': The number of tests multiplied by the number

+ of iterations that test was run.

+ 'average_number_of_iterations': The average number of iterations each

+ test for this step was run.

+ 'error_rate': The number of tasks that ended in error out of all tasks

+ in swarming_task_list.

+ }

+ """

+ swarming_tasks_per_day = NOT_AVAILABLE

+ average_execution_time = NOT_AVAILABLE

+ average_time_in_queue = NOT_AVAILABLE

+ longest_execution_time = NOT_AVAILABLE

+ shortest_execution_time = NOT_AVAILABLE

+ average_number_of_iterations = NOT_AVAILABLE

+ average_number_of_tests_run = NOT_AVAILABLE

+ error_rate = NOT_AVAILABLE

+ if swarming_task_list:

+ task_count = len(swarming_task_list)

+ swarming_tasks_per_day = task_count / float((end_date - start_date).days)

+ execution_times_seconds = []

+ in_queue_times = []

+ iteration_counts = []

+ tests_counts = []

+ error_count = 0

+ for swarming_task in swarming_task_list:

+ # Execution time.

+ if swarming_task.started_time and swarming_task.completed_time:

+ execution_times_seconds.append(

+ (swarming_task.completed_time - swarming_task.started_time).seconds)

+ # In-queue time.

+ if swarming_task.started_time and swarming_task.created_time:

+ in_queue_times.append(

+ (swarming_task.started_time - swarming_task.created_time).seconds)

+ # Number of iterations.

+ iterations_to_rerun = swarming_task.parameters.get(

+ 'iterations_to_rerun')

+ if iterations_to_rerun is not None:

+ iteration_counts.append(iterations_to_rerun)

+ # Number of tests.

+ number_of_tests = len(swarming_task.tests_statuses)

+ if number_of_tests:

+ tests_counts.append(number_of_tests)

+ # Error rate.

+ if swarming_task.status == wf_analysis_status.ERROR:

+ error_count += 1

+ average_execution_time = (_GetAverageOfNumbersInList(

+ execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE)

+ average_time_in_queue = (

+ _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else

+ NOT_AVAILABLE)

+ longest_execution_time = (

+ str(datetime.timedelta(seconds=max(execution_times_seconds)))

+ if execution_times_seconds else NOT_AVAILABLE)

+ shortest_execution_time = (

+ str(datetime.timedelta(seconds=min(execution_times_seconds)))

+ if execution_times_seconds else NOT_AVAILABLE)

+ average_number_of_iterations = _GetAverageOfNumbersInList(iteration_counts)

+ average_number_of_tests_run = _GetAverageOfNumbersInList(tests_counts)

+ tests_times_iterations = (

+ average_number_of_iterations * average_number_of_tests_run)

+ error_rate = error_count / task_count

+ return {

+ 'swarming_tasks_per_day': swarming_tasks_per_day,

+ 'average_execution_time': average_execution_time,

+ 'average_time_in_queue': average_time_in_queue,

+ 'longest_execution_time': longest_execution_time,

+ 'shortest_execution_time': shortest_execution_time,

+ 'tests_times_iterations': tests_times_iterations,

+ 'average_number_of_iterations': average_number_of_iterations,

+ 'average_number_of_tests_run': average_number_of_tests_run,

+ 'error_rate': error_rate

+ }

+def _GetReport(categorized_swarming_task_dict, start_date, end_date):

+ """Returns a swarming task data report as an ordered dict sorted by priority.

+ Args:

+ categorized_swarming_task_dict: A dict categorizing WFSwarmingTask entities

+ organized by priority, master_name, builder_name, step_name. This dict

+ should be the output from _CategorizeSwarmingTaskData().

+ start_date: The earliest request date for which data should be computed.

+ end_date: The latest request date for which data should be computed.

+ Returns:

+ An ordered dict by highest priority (lower priority number) swarming tasks

+ in the format:

+ {

+ priority: {

+ 'master_name': {

+ 'builder_name': {

+ 'step_name': {

+ 'swarming_tasks_per_day': number or 'N/A',

+ 'average_execution_time': number or 'N/A',

+ 'average_time_in_queue': number or 'N/A',

+ 'longest_execution_time': number or 'N/A',

+ 'shortest_execution_time': number or 'N/A',

+ 'tests_times_iterations': number or 'N/A'

+ 'average_number_of_tests_run': number or 'N/A',

+ 'average_number_of_iterations': number or 'N/A',

+ 'error_rate': number or 'N/A'

+ },

+ ...

+ },

+ ...

+ },

+ ...

+ },

+ ...

+ }

+ """

+ report = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

+ for priority, masters in categorized_swarming_task_dict.iteritems():

+ for master, builders in masters.iteritems():

+ for builder, steps in builders.iteritems():

+ for step, swarming_task_data_list in steps.iteritems():

+ report[priority][master][builder][step] = _GetReportInformation(

+ swarming_task_data_list, start_date, end_date)

+ return OrderedDict(sorted(report.items()))

+def CreateHtmlPage(report, start_date, end_date):

+ """Generates an html string for displaying the report.

+ Args:

+ report: A dict containing all the relevant information returned from

+ _GetReport().

+ start_date: The earliest date that a swarming task was requested.

+ end_date: The latest date that a swarming task was requested.

+ Returns:

+ A string containing the html body for the final report page.

+ """

+ html = """

+ <style>

+ table {

+ border-collapse: collapse;

+ border: 1px solid gray;

+ }

+ table td, th {

+ border: 1px solid gray;

+ }

+ </style>"""

+ html += '<b>Swarming task metadata from %s to %s (%s days)</b>' % (

+ str(start_date), str(end_date), (end_date - start_date).days)

+ html += '<h1>Aggregate metadata for swarming tasks by priority</h1>'

+ cell_template = '<td>%s</td>'

+ for priority, masters in report.iteritems():

+ html += '<h2>Task Priority: %s</h2>' % priority

+ html += """

+ <table>

+ <tr>

+ <th>Master</th>

+ <th>Builder</th>

+ <th>Step</th>

+ <th>Average # Tasks Per Day</th>

+ <th>Average Time In Queue</th>

+ <th>Average Execution Time</th>

+ <th>Longest Execution Time</th>

+ <th>Shortest Execution Time</th>

+ <th># Tests * # Iterations</th>

+ <th>Average # Iterations</th>

+ <th>Average # Tests Run</th>

+ <th>Error Rate</th>

+ </tr>"""

+ for master_name, builder_reports in masters.iteritems():

+ for builder_name, steps in builder_reports.iteritems():

+ for step_name in steps:

+ builder_report = (

+ report[priority][master_name][builder_name][step_name])

+ html += '<tr>'

+ html += cell_template % master_name

+ html += cell_template % builder_name

+ html += cell_template % _FormatStepName(step_name)

+ html += cell_template % _FormatDigits(

+ builder_report['swarming_tasks_per_day'])

+ html += cell_template % _FormatSecondsAsHMS(_FormatDigits(

+ builder_report['average_time_in_queue']))

+ html += cell_template % _FormatSecondsAsHMS(_FormatDigits(

+ builder_report['average_execution_time']))

+ html += cell_template % builder_report['longest_execution_time']

+ html += cell_template % builder_report['shortest_execution_time']

+ html += cell_template % _FormatDigits(

+ builder_report['tests_times_iterations'])

+ html += cell_template % _FormatDigits(

+ builder_report['average_number_of_iterations'])

+ html += cell_template % _FormatDigits(

+ builder_report['average_number_of_tests_run'])

+ html += cell_template % _FormatDigits(builder_report['error_rate'])

+ html += '</table>'

+ return html

+if __name__ == '__main__':

+ # Set up the Remote API to use services on the live App Engine.

+ remote_api.EnableRemoteApi(app_id='findit-for-me')

+ START_DATE = datetime.datetime(2016, 2, 1)

+ END_DATE = datetime.datetime(2016, 3, 7)

+ wf_analysis_query = WfSwarmingTask.query(

+ WfSwarmingTask.created_time >= START_DATE,

+ WfSwarmingTask.created_time < END_DATE)

+ data_list = wf_analysis_query.fetch()

+ categorized_data_dict = _CategorizeSwarmingTaskData(data_list)

+ final_report = _GetReport(categorized_data_dict, START_DATE, END_DATE)

+ findit_tmp_dir = os.environ.get('TMP_DIR')

+ if not findit_tmp_dir:

+ findit_tmp_dir = os.getcwd()

+ report_path = os.path.join(findit_tmp_dir,

+ 'swarming_task_metadata_report.html')

+ with open(report_path, 'w') as f:

+ f.write(CreateHtmlPage(final_report, START_DATE, END_DATE))

+ print 'Swarming task metadata report available at file://%s' % report_path

« no previous file with comments | « no previous file | no next file » | no next file with comments »