appengine/findit/util_scripts/remote_queries/try_job_data_query.py - Issue 1777473003: [Findit] Separating compile from test data in report script

Unified Diff: appengine/findit/util_scripts/remote_queries/try_job_data_query.py

Issue 1777473003: [Findit] Separating compile from test data in report script (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: appengine/findit/util_scripts/remote_queries/try_job_data_query.py

diff --git a/appengine/findit/util_scripts/remote_queries/try_job_data_query.py b/appengine/findit/util_scripts/remote_queries/try_job_data_query.py

index 6c155c4379be3b2ee28aaace210846884827742c..6f03a5edd9738f1a589575ca30bf979ac4f4879c 100644

--- a/appengine/findit/util_scripts/remote_queries/try_job_data_query.py

+++ b/appengine/findit/util_scripts/remote_queries/try_job_data_query.py

@@ -43,26 +43,34 @@ def _FormatSecondsAsHMS(seconds):

return '%d:%02d:%02d' % (hours, minutes, seconds)

-def _CategorizeTryJobDataByMasterAndBuilder(try_job_data_list):

- """Categorizes try_job_data_list into a dict by master_name and builder_name.

+def _CategorizeTryJobData(try_job_data_list):

+ """Categorizes try_job_data_list into a dict.

Args:

try_job_data_list: A list of WfTryJobData objects.

Returns:

A dict in the format:

{

- 'master_name1': {

- 'builder_name1': [WfTryJobData1, WfTryJobData2, ...],

- 'builder_name2': [WfTryJobData3, ...]

+ 'compile': {

+ 'master_name1': {

+ 'builder_name1': [WfTryJobData1, WfTryJobData2, ...],

+ 'builder_name2': [WfTryJobData3, ...]

+ },

+ 'master_name2: {

+ ...

+ }

- 'master_name2: {

- ...

+ 'test': {

+ ...

}

"""

- categorized_data = defaultdict(lambda: defaultdict(list))

+ categorized_data = {

+ 'compile': defaultdict(lambda: defaultdict(list)),

+ 'test': defaultdict(lambda: defaultdict(list))

+ }

for try_job_data in try_job_data_list:

master_name = try_job_data.master_name

builder_name = try_job_data.builder_name

@@ -70,7 +78,10 @@ def _CategorizeTryJobDataByMasterAndBuilder(try_job_data_list):

if not master_name or not builder_name:

continue

- categorized_data[master_name][builder_name].append(try_job_data)

+ try_job_type = try_job_data.try_job_type

+ categorized_data[try_job_type][master_name][builder_name].append(

+ try_job_data)

return categorized_data

@@ -98,8 +109,7 @@ def _GetReportInformation(try_job_data_list, start_date, end_date):

needed to run before coming to a conclusion,

'longest_execution_time': The length of time of the slowest try job,

'shortest_execution_time': The length of time of the fastest try job,

- 'number_of_compile_try_jobs': The number of try jobs for 'compile',

- 'number_of_test_try_jobs': The number of try jobs for 'test',

+ 'number_of_try_jobs': The number of try jobs in this list,

'detection_rate': The number of try jobs that found any culprits at all

regardless of correctness over the total number of try jobs.

}

@@ -111,11 +121,8 @@ def _GetReportInformation(try_job_data_list, start_date, end_date):

average_commits_analyzed = NOT_AVAILABLE

longest_execution_time = NOT_AVAILABLE

shortest_execution_time = NOT_AVAILABLE

- compile_try_jobs = NOT_AVAILABLE

- test_try_jobs = NOT_AVAILABLE

detection_rate = NOT_AVAILABLE

- compile_try_jobs = NOT_AVAILABLE

- test_try_jobs = NOT_AVAILABLE

+ number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0

if try_job_data_list:

try_jobs_per_day = (

@@ -125,8 +132,6 @@ def _GetReportInformation(try_job_data_list, start_date, end_date):

in_queue_times = []

commits_analyzed = []

culprits_detected = 0

- compile_try_jobs = 0

- test_try_jobs = 0

for try_job_data in try_job_data_list:

# Regression range size.

@@ -151,12 +156,6 @@ def _GetReportInformation(try_job_data_list, start_date, end_date):

if try_job_data.culprits:

culprits_detected += 1

- # Try job types.

- if try_job_data.try_job_type == 'compile':

- compile_try_jobs += 1

- elif try_job_data.try_job_type == 'test':

- test_try_jobs += 1

average_regression_range_size = _GetAverageOfNumbersInList(

regression_range_sizes)

average_execution_time = (_GetAverageOfNumbersInList(

@@ -182,23 +181,22 @@ def _GetReportInformation(try_job_data_list, start_date, end_date):

'average_commits_analyzed': average_commits_analyzed,

'longest_execution_time': longest_execution_time,

'shortest_execution_time': shortest_execution_time,

- 'number_of_compile_try_jobs': compile_try_jobs,

- 'number_of_test_try_jobs': test_try_jobs,

+ 'number_of_try_jobs': number_of_try_jobs,

'detection_rate': detection_rate

}

def _GetReportListForMastersAndBuilders(supported_masters_to_builders,

- sorted_try_job_data_dict, start_date,

- end_date):

+ categorized_data_dict,

+ start_date, end_date):

"""Gets a full try job data report of each master and builder.

Args:

supported_masters_to_builders: Findit's config for mapping masters to

builders.

- sorted_try_job_data_dict: A 2-layer dict mapping masters to builders

- and builders to lists of WfTryJobData objects. This dict should be the

- output of _SortTryJobDataByMasterAndBuilder().

+ categorized_data_dict: A dict mapping try job types to masters, masters to

+ builders, and builders to lists of WfTryJobData objects. This dict should

+ be the output of _CategorizeTryJobData().

start_date: The earliest request date for which data should be computed.

end_date: The latest request date for which data should be computed.

@@ -208,57 +206,66 @@ def _GetReportListForMastersAndBuilders(supported_masters_to_builders,

format [supported_dict, unsupported_dict]. All supported masters and

builders are accounted for even if there is no data, and all available data

is displayed even if support for it has been deprecated.

[

{

- 'master_name': {

- 'builder_name': {

- 'try_jobs_per_day': 1 or 'N/A',

- 'average_regression_range_size': 1 or 'N/A',

- 'average_execution_time': 1 or 'N/A',

- 'average_time_in_queue': 1 or 'N/A',

- 'average_commits_analyzed': 1 or 'N/A',

- 'longest_execution_time': 1 or 'N/A',

- 'shortest_execution_time': 1 or 'N/A',

- 'number_of_compile_try_jobs': 1 or 'N/A',

- 'number_of_test_try_jobs': 1 or 'N/A',

- 'detection_rate': 0.0-1.0 or 'N/A',

- },

- ...

- },

+ 'compile': {

+ 'master_name': {

+ 'builder_name': {

+ 'try_jobs_per_day': 1 or 'N/A',

+ 'average_regression_range_size': 1 or 'N/A',

+ 'average_execution_time': 1 or 'N/A',

+ 'average_time_in_queue': 1 or 'N/A',

+ 'average_commits_analyzed': 1 or 'N/A',

+ 'longest_execution_time': 1 or 'N/A',

+ 'shortest_execution_time': 1 or 'N/A',

+ 'number_of_try_jobs': 1 or 'N/A',

+ 'detection_rate': 0.0-1.0 or 'N/A',

+ },

+ ...

+ },

+ 'test': {

+ ....

+ }

...

{

- 'master_name': {

- 'builder_name': {

- ...

- }

+ 'compile': ...,

+ 'test': ...

}

]

"""

- supported = {}

- unsupported = {}

+ supported = defaultdict(dict)

+ unsupported = defaultdict(dict)

report = [supported, unsupported]

# Build the supported report according to Findit's config.

for master, builders in supported_masters_to_builders.iteritems():

- supported[master] = {}

+ supported['compile'][master] = {}

+ supported['test'][master] = {}

for builder in builders:

- try_job_data_list = sorted_try_job_data_dict.get(master, {}).get(builder)

- supported[master][builder] = _GetReportInformation(

- try_job_data_list, start_date, end_date)

+ compile_try_job_data_list = (

+ categorized_data_dict.get('compile').get(master, {}).get(builder))

+ test_try_job_data_list = (

+ categorized_data_dict.get('test').get(master, {}).get(builder))

+ supported['compile'][master][builder] = _GetReportInformation(

+ compile_try_job_data_list, start_date, end_date)

+ supported['test'][master][builder] = _GetReportInformation(

+ test_try_job_data_list, start_date, end_date)

# Build the unsupported report according to what's in the try job data list

# but not found in Findit's config.

- for master, builders in sorted_try_job_data_dict.iteritems():

- unsupported[master] = {}

- for builder in builders:

- if not supported_masters_to_builders.get(master, {}).get(builder):

- try_job_data_list = sorted_try_job_data_dict[master][builder]

- unsupported[master][builder] = _GetReportInformation(

- try_job_data_list, start_date, end_date)

+ for try_job_type, try_job_data_dict in categorized_data_dict.iteritems():

+ for master, builders in try_job_data_dict.iteritems():

+ unsupported[try_job_type][master] = {}

+ for builder in builders:

+ if not supported_masters_to_builders.get(master, {}).get(builder):

+ try_job_data_list = (

+ categorized_data_dict[try_job_type][master][builder])

+ unsupported[try_job_type][master][builder] = _GetReportInformation(

+ try_job_data_list, start_date, end_date)

return report

@@ -290,62 +297,69 @@ def CreateHtmlPage(report_list, start_date, end_date):

html += '<b>Try job metadata from %s to %s (%s days)</b>' % (

str(start_date), str(end_date), (end_date - start_date).days)

- html += """

- <h1>Aggregate metadata for try jobs per master/builder</h1>

- <table>

- <tr>

- <th>Master</th>

- <th>Builder</th>

- <th># Try Jobs Per Day</th>

- <th>Average Regression Range Size</th>

- <th>Average Number Of Revisions Analyzed</th>

- <th>Average Time In Queue (HH:MM:SS)</th>

- <th>Average Execution Time (HH:MM:SS)</th>

- <th>Longest Execution Time (HH:MM:SS)</th>

- <th>Shortest Execution Time (HH:MM:SS)</th>

- <th>Culprit Detection Rate</th>

- <th>Number of Compile Try Jobs</th>

- <th>Number of Test Try Jobs</th>

- </tr>"""

+ html += '<h1>Aggregate metadata for try jobs per master/builder</h1>'

for i in range(len(report_list)):

- cell_template = '<td>%s</td>' if i == 0 else '<td bgcolor="#CCCCCC">%s</td>'

+ if i == 0: # Supported dict.

+ html += '<h2>Supported Masters/Builders</h2>'

+ cell_template = '<td>%s</td>'

+ else: # Unsupported dict.

+ html += '<h2>Unsupported Masters/Builders</h2>'

+ cell_template = '<td bgcolor="#CCCCCC">%s</td>'

report = report_list[i]

- for master_name, builder_reports in report.iteritems():

- for builder_name in builder_reports:

- builder_report = report[master_name][builder_name]

- html += '<tr>'

- html += cell_template % master_name

- html += cell_template % builder_name

- html += cell_template % _FormatDigits(

- builder_report['try_jobs_per_day'])

- html += cell_template % _FormatDigits(

- builder_report['average_regression_range_size'])

- html += cell_template % _FormatDigits(

- builder_report['average_commits_analyzed'])

- html += cell_template % _FormatSecondsAsHMS(_FormatDigits(

- builder_report['average_time_in_queue']))

- html += cell_template % _FormatSecondsAsHMS(_FormatDigits(

- builder_report['average_execution_time']))

- html += cell_template % builder_report['longest_execution_time']

- html += cell_template % builder_report['shortest_execution_time']

- html += cell_template % _FormatDigits(builder_report['detection_rate'])

- html += cell_template % builder_report['number_of_compile_try_jobs']

- html += cell_template % builder_report['number_of_test_try_jobs']

- html += '</tr></thead><tbody>'

- html += """

- </tbody>

- </table>"""

+ for try_job_type, try_job_report in report.iteritems():

+ html += '<h3>Try job type: %s</h3>' % try_job_type

+ html += """

+ <table>

+ <tr>

+ <th>Master</th>

+ <th>Builder</th>

+ <th># Try Jobs Per Day</th>

+ <th>Average Regression Range Size</th>

+ <th>Average # Revisions Analyzed</th>

+ <th>Average Time In Queue</th>

+ <th>Average Execution Time</th>

+ <th>Longest Execution Time</th>

+ <th>Shortest Execution Time</th>

+ <th>Culprit Detection Rate</th>

+ <th># Try Jobs</th>

+ </tr>"""

+ for master_name, builder_reports in try_job_report.iteritems():

+ for builder_name in builder_reports:

+ builder_report = report[try_job_type][master_name][builder_name]

+ html += '<tr>'

+ html += cell_template % master_name

+ html += cell_template % builder_name

+ html += cell_template % _FormatDigits(

+ builder_report['try_jobs_per_day'])

+ html += cell_template % _FormatDigits(

+ builder_report['average_regression_range_size'])

+ html += cell_template % _FormatDigits(

+ builder_report['average_commits_analyzed'])

+ html += cell_template % _FormatSecondsAsHMS(_FormatDigits(

+ builder_report['average_time_in_queue']))

+ html += cell_template % _FormatSecondsAsHMS(_FormatDigits(

+ builder_report['average_execution_time']))

+ html += cell_template % builder_report['longest_execution_time']

+ html += cell_template % builder_report['shortest_execution_time']

+ html += cell_template % _FormatDigits(

+ builder_report['detection_rate'])

+ html += cell_template % builder_report['number_of_try_jobs']

+ html += '</tr>'

+ html += '</table>'

return html

if __name__ == '__main__':

# Set up the Remote API to use services on the live App Engine.

remote_api.EnableRemoteApi(app_id='findit-for-me')

START_DATE = datetime.datetime(2016, 2, 1)

- END_DATE = datetime.datetime(2016, 2, 17)

+ END_DATE = datetime.datetime(2016, 3, 8)

wf_analysis_query = WfTryJobData.query(

WfTryJobData.request_time >= START_DATE,

@@ -353,9 +367,9 @@ if __name__ == '__main__':

data_list = wf_analysis_query.fetch()

masters_to_builders = FinditConfig.Get().builders_to_trybots

- categorized_data_dict = _CategorizeTryJobDataByMasterAndBuilder(data_list)

+ data = _CategorizeTryJobData(data_list)

full_report_list = _GetReportListForMastersAndBuilders(

- masters_to_builders, categorized_data_dict, START_DATE, END_DATE)

+ masters_to_builders, data, START_DATE, END_DATE)

findit_tmp_dir = os.environ.get('TMP_DIR')

if not findit_tmp_dir:

« no previous file with comments | « no previous file | no next file » | no next file with comments »