| OLD | NEW |
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 """Pulls historical try job metadata from Findit and prints a report.""" |
| 6 |
| 7 import argparse |
| 8 from collections import defaultdict |
| 9 import datetime |
| 10 import os |
| 11 import sys |
| 12 |
| 13 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir) |
| 14 sys.path.insert(1, _REMOTE_API_DIR) |
| 15 |
| 16 import remote_api |
| 17 |
| 18 from model.wf_config import FinditConfig |
| 19 from model.wf_try_job_data import WfTryJobData |
| 20 |
| 21 |
| 22 NOT_AVAILABLE = 'N/A' |
| 23 |
| 24 |
| 25 def _GetAverageOfNumbersInList(numbers): |
| 26 """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty.""" |
| 27 return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE |
| 28 |
| 29 |
| 30 def _FormatDigits(number): |
| 31 """Formats number into a 2-digit float, or NOT_AVAILABLE.""" |
| 32 if isinstance(number, float): |
| 33 return float('%.2f' % number) |
| 34 return NOT_AVAILABLE |
| 35 |
| 36 |
| 37 def _FormatSecondsAsHMS(seconds): |
| 38 """Formats the number of seconds into hours, minutes, seconds.""" |
| 39 if seconds == NOT_AVAILABLE: |
| 40 return NOT_AVAILABLE |
| 41 |
| 42 minutes, seconds = divmod(seconds, 60) |
| 43 hours, minutes = divmod(minutes, 60) |
| 44 return '%d:%02d:%02d' % (hours, minutes, seconds) |
| 45 |
| 46 |
| 47 def _GetReportInformation(try_job_data_list, start_date, end_date): |
| 48 """Computes and returns try job metadata. |
| 49 |
| 50 Args: |
| 51 try_job_data_list: A list of WfTryJobData entities. |
| 52 start_date: The earliest request date to compute data. |
| 53 end_date: The latest request date to compute data. |
| 54 |
| 55 Returns: |
| 56 A dict in the following format: |
| 57 { |
| 58 'try_jobs_per_day': The average number of jobs requested over the time |
| 59 period specified, |
| 60 'average_regression_range_size': The average number of revisions in the |
| 61 regression range when the original failure was detected, |
| 62 'average_execution_time': The average amount of time spent on each try |
| 63 job not including in-queue time. |
| 64 'average_time_in_queue': The average amount of time a try job spends |
| 65 in-queue before it is picked up. |
| 66 'average_commits_analyzed': The average number of revisions each try job |
| 67 needed to run before coming to a conclusion, |
| 68 'longest_execution_time': The length of time of the slowest try job, |
| 69 'shortest_execution_time': The length of time of the fastest try job, |
| 70 'number_of_try_jobs': The number of try jobs in this list, |
| 71 'detection_rate': The number of try jobs that found any culprits at all |
| 72 regardless of correctness over the total number of try jobs. |
| 73 'error_rate': The number of try jobs that had an error / the total |
| 74 number of try jobs in the list. |
| 75 'time_per_revision': The average amount of execution time spent on each |
| 76 revision. |
| 77 'under_five_minutes_rate': The number of try jobs that finished under 5 |
| 78 minutes / total try jobs. |
| 79 'under_fifteen_minutes_rate': The number of try jobs that finished in |
| 80 under 15 minutes / total try jobs. |
| 81 'under_thirty_minutes_rate': The number of try jobs that finished in |
| 82 under 30 minutes / total try jobs. |
| 83 'over_thirty_minutes_rate': The number of try jobs that finished in over |
| 84 30 minutes / total try jobs. |
| 85 } |
| 86 """ |
| 87 try_jobs_per_day = NOT_AVAILABLE |
| 88 average_regression_range_size = NOT_AVAILABLE |
| 89 average_execution_time = NOT_AVAILABLE |
| 90 average_time_in_queue = NOT_AVAILABLE |
| 91 average_commits_analyzed = NOT_AVAILABLE |
| 92 longest_execution_time = NOT_AVAILABLE |
| 93 shortest_execution_time = NOT_AVAILABLE |
| 94 detection_rate = NOT_AVAILABLE |
| 95 error_rate = NOT_AVAILABLE |
| 96 number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0 |
| 97 time_per_revision = NOT_AVAILABLE |
| 98 under_five_minutes_rate = NOT_AVAILABLE |
| 99 under_fifteen_minutes_rate = NOT_AVAILABLE |
| 100 under_thirty_minutes_rate = NOT_AVAILABLE |
| 101 over_thirty_minutes_rate = NOT_AVAILABLE |
| 102 |
| 103 if try_job_data_list: |
| 104 try_jobs_per_day = ( |
| 105 len(try_job_data_list) / float((end_date - start_date).days)) |
| 106 regression_range_sizes = [] |
| 107 execution_times_seconds = [] |
| 108 in_queue_times = [] |
| 109 commits_analyzed = [] |
| 110 culprits_detected = 0 |
| 111 errors_detected = 0 |
| 112 number_under_five_minutes = 0 |
| 113 number_under_fifteen_minutes = 0 |
| 114 number_under_thirty_minutes = 0 |
| 115 number_over_thirty_minutes = 0 |
| 116 total_number_of_try_jobs = len(try_job_data_list) |
| 117 |
| 118 for try_job_data in try_job_data_list: |
| 119 # Regression range size. |
| 120 if try_job_data.regression_range_size: |
| 121 regression_range_sizes.append(try_job_data.regression_range_size) |
| 122 |
| 123 # Execution time. |
| 124 if try_job_data.start_time and try_job_data.end_time: |
| 125 execution_time_delta = ( |
| 126 try_job_data.end_time - try_job_data.start_time) |
| 127 execution_time = execution_time_delta.total_seconds() |
| 128 execution_times_seconds.append(execution_time) |
| 129 |
| 130 # In-queue time. |
| 131 if try_job_data.start_time and try_job_data.request_time: |
| 132 in_queue_time_delta = ( |
| 133 try_job_data.start_time - try_job_data.request_time) |
| 134 in_queue_time = in_queue_time_delta.total_seconds() |
| 135 in_queue_times.append(in_queue_time) |
| 136 |
| 137 # Total time end-to-end. |
| 138 if try_job_data.request_time and try_job_data.end_time: |
| 139 total_time_delta = try_job_data.end_time - try_job_data.start_time |
| 140 total_time_seconds = total_time_delta.total_seconds() |
| 141 |
| 142 if total_time_seconds < 300: # Under 5 minutes. |
| 143 number_under_five_minutes += 1 |
| 144 elif total_time_seconds < 900: # Under 15 minutes. |
| 145 number_under_fifteen_minutes += 1 |
| 146 elif total_time_seconds < 1800: # Under 30 minutes. |
| 147 number_under_thirty_minutes += 1 |
| 148 else: # Over 30 minutes. |
| 149 number_over_thirty_minutes += 1 |
| 150 |
| 151 # Number of commits analyzed. |
| 152 if try_job_data.number_of_commits_analyzed: |
| 153 commits_analyzed.append(try_job_data.number_of_commits_analyzed) |
| 154 |
| 155 # Culprit detection rate. |
| 156 if try_job_data.culprits: |
| 157 culprits_detected += 1 |
| 158 |
| 159 if try_job_data.error: |
| 160 errors_detected += 1 |
| 161 |
| 162 average_regression_range_size = _GetAverageOfNumbersInList( |
| 163 regression_range_sizes) |
| 164 average_execution_time = (_GetAverageOfNumbersInList( |
| 165 execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE) |
| 166 average_time_in_queue = ( |
| 167 _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else |
| 168 NOT_AVAILABLE) |
| 169 average_commits_analyzed = _GetAverageOfNumbersInList( |
| 170 commits_analyzed) |
| 171 longest_execution_time = ( |
| 172 str(datetime.timedelta(seconds=max(execution_times_seconds))) |
| 173 if execution_times_seconds else NOT_AVAILABLE) |
| 174 shortest_execution_time = ( |
| 175 str(datetime.timedelta(seconds=min(execution_times_seconds))) |
| 176 if execution_times_seconds else NOT_AVAILABLE) |
| 177 detection_rate = float(culprits_detected) / total_number_of_try_jobs |
| 178 error_rate = float(errors_detected) / total_number_of_try_jobs |
| 179 time_per_revision = (average_execution_time / average_commits_analyzed if ( |
| 180 average_execution_time != NOT_AVAILABLE and |
| 181 average_commits_analyzed != NOT_AVAILABLE) else NOT_AVAILABLE) |
| 182 |
| 183 under_five_minutes_rate = ( |
| 184 float(number_under_five_minutes) / total_number_of_try_jobs) |
| 185 under_fifteen_minutes_rate = ( |
| 186 float(number_under_fifteen_minutes) / total_number_of_try_jobs) |
| 187 under_thirty_minutes_rate = ( |
| 188 float(number_under_thirty_minutes) / total_number_of_try_jobs) |
| 189 over_thirty_minutes_rate = ( |
| 190 float(number_over_thirty_minutes) / total_number_of_try_jobs) |
| 191 |
| 192 return { |
| 193 'try_jobs_per_day': _FormatDigits(try_jobs_per_day), |
| 194 'average_regression_range_size': _FormatDigits( |
| 195 average_regression_range_size), |
| 196 'average_execution_time': _FormatSecondsAsHMS( |
| 197 _FormatDigits(average_execution_time)), |
| 198 'average_time_in_queue': _FormatSecondsAsHMS( |
| 199 _FormatDigits(average_time_in_queue)), |
| 200 'average_commits_analyzed': _FormatDigits(average_commits_analyzed), |
| 201 'longest_execution_time': longest_execution_time, |
| 202 'shortest_execution_time': shortest_execution_time, |
| 203 'number_of_try_jobs': number_of_try_jobs, |
| 204 'detection_rate': _FormatDigits(detection_rate), |
| 205 'error_rate': _FormatDigits(error_rate), |
| 206 'time_per_revision': _FormatSecondsAsHMS( |
| 207 _FormatDigits(time_per_revision)), |
| 208 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate), |
| 209 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate), |
| 210 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate), |
| 211 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate) |
| 212 } |
| 213 |
| 214 |
| 215 def PrintCommonStats(try_job_data_list, start_date, end_date, indent): |
| 216 """Takes a list of WfTryJobData entities and prints their stats.""" |
| 217 spaces = '' |
| 218 for _ in range(indent): |
| 219 spaces += ' ' |
| 220 |
| 221 report_info = _GetReportInformation(try_job_data_list, start_date, end_date) |
| 222 for key, value in report_info.iteritems(): |
| 223 print '%s%s: %s' % (spaces, key, value) |
| 224 |
| 225 |
| 226 def PrettyPrint(grouped_data, start_date, end_date, indent=0): |
| 227 if not grouped_data: |
| 228 return |
| 229 if isinstance(grouped_data, list): |
| 230 # Print the stats about the list. |
| 231 PrintCommonStats(grouped_data, start_date, end_date, indent) |
| 232 elif isinstance(grouped_data, dict): |
| 233 spaces = '' |
| 234 for _ in range(indent): |
| 235 spaces += ' ' |
| 236 |
| 237 for field, data in grouped_data.iteritems(): |
| 238 print spaces + field |
| 239 PrettyPrint(data, start_date, end_date, indent + 2) |
| 240 else: |
| 241 raise Exception('grouped_data dict should only contain dicts or lists.') |
| 242 |
| 243 |
| 244 def _SplitListByTryJobType(try_job_data_list): |
| 245 categorized_data_dict = { |
| 246 'compile': [], |
| 247 'test': [] |
| 248 } |
| 249 for try_job_data in try_job_data_list: |
| 250 if try_job_data.try_job_type.lower() == 'compile': |
| 251 categorized_data_dict['compile'].append(try_job_data) |
| 252 elif try_job_data.try_job_type.lower() == 'test': |
| 253 categorized_data_dict['test'].append(try_job_data) |
| 254 |
| 255 return categorized_data_dict |
| 256 |
| 257 |
| 258 def _SplitListByMaster(try_job_data_list): |
| 259 categorized_data_dict = defaultdict(list) |
| 260 |
| 261 for try_job_data in try_job_data_list: |
| 262 master_name = try_job_data.master_name |
| 263 |
| 264 if not master_name: |
| 265 continue |
| 266 |
| 267 categorized_data_dict[master_name].append(try_job_data) |
| 268 |
| 269 return categorized_data_dict |
| 270 |
| 271 |
| 272 def _SplitListByBuilder(try_job_data_list): |
| 273 categorized_data_dict = defaultdict(list) |
| 274 |
| 275 for try_job_data in try_job_data_list: |
| 276 builder_name = try_job_data.builder_name |
| 277 |
| 278 if not builder_name: |
| 279 continue |
| 280 |
| 281 categorized_data_dict[builder_name].append(try_job_data) |
| 282 |
| 283 return categorized_data_dict |
| 284 |
| 285 |
| 286 def _SplitListByHeuristicResults(try_job_data_list): |
| 287 categorized_data_dict = { |
| 288 'with heuristic guidance': [], |
| 289 'without heuristic guidance': [] |
| 290 } |
| 291 for try_job_data in try_job_data_list: |
| 292 if try_job_data.has_heuristic_results: |
| 293 categorized_data_dict['with heuristic guidance'].append(try_job_data) |
| 294 else: |
| 295 categorized_data_dict['without heuristic guidance'].append(try_job_data) |
| 296 return categorized_data_dict |
| 297 |
| 298 |
| 299 def _SplitListByCompileTargets(try_job_data_list): |
| 300 categorized_data_dict = { |
| 301 'with compile targets': [], |
| 302 'without compile targets': [] |
| 303 } |
| 304 for try_job_data in try_job_data_list: |
| 305 if try_job_data.has_compile_targets: |
| 306 categorized_data_dict['with compile targets'].append(try_job_data) |
| 307 else: |
| 308 categorized_data_dict['without compile targets'].append(try_job_data) |
| 309 return categorized_data_dict |
| 310 |
| 311 |
| 312 def SplitListByOption(try_job_data_list, option): |
| 313 """Takes a WfTryJobData list and separates it into a dict based on arg. |
| 314 |
| 315 Args: |
| 316 try_job_data_list: A list of WfTryJobData entities. |
| 317 option: An option with which to split the data by. |
| 318 |
| 319 Returns: |
| 320 A dict where the keys are how the data is separated based on arg and the |
| 321 values are the corresponding lists of data. |
| 322 """ |
| 323 |
| 324 if option == 't': # Try job type. |
| 325 return _SplitListByTryJobType(try_job_data_list) |
| 326 elif option == 'm': # Main waterfall master. |
| 327 return _SplitListByMaster(try_job_data_list) |
| 328 elif option == 'b': # Main waterfall builder. |
| 329 return _SplitListByBuilder(try_job_data_list) |
| 330 elif option == 'r': # Whether or not heuristic results are included. |
| 331 return _SplitListByHeuristicResults(try_job_data_list) |
| 332 elif option == 'c': # Whether or not compile targets are included. |
| 333 return _SplitListByCompileTargets(try_job_data_list) |
| 334 # TODO(lijeffrey): Add support for splitting by platform. |
| 335 |
| 336 # Unsupported flag, bail out without modification. |
| 337 return try_job_data_list |
| 338 |
| 339 |
| 340 def SplitStructByOption(try_job_data_struct, option): |
| 341 if isinstance(try_job_data_struct, list): |
| 342 try_job_data_struct = SplitListByOption(try_job_data_struct, option) |
| 343 elif isinstance(try_job_data_struct, dict): |
| 344 for key, struct in try_job_data_struct.iteritems(): |
| 345 try_job_data_struct[key] = SplitStructByOption(struct, option) |
| 346 else: |
| 347 raise Exception('try job data dict must only contain lists or dicts.') |
| 348 |
| 349 return try_job_data_struct |
| 350 |
| 351 |
| 352 def GetArgsInOrder(): |
| 353 command_line_args = sys.argv[1:] |
| 354 |
| 355 parser = argparse.ArgumentParser() |
| 356 parser.add_argument('-t', action='store_true', |
| 357 help='group try job data by type (compile, test)') |
| 358 parser.add_argument('-m', action='store_true', |
| 359 help='group try job data by master') |
| 360 parser.add_argument('-b', action='store_true', |
| 361 help='group try job data by builder') |
| 362 parser.add_argument('-r', action='store_true', |
| 363 help=('group try job data by those with and without ' |
| 364 'heuristic results')) |
| 365 parser.add_argument('-c', action='store_true', |
| 366 help=('group try job data by those with and without ' |
| 367 'compile targets')) |
| 368 |
| 369 args_dict = vars(parser.parse_args()) |
| 370 |
| 371 # Preserve order from original command. |
| 372 ordered_args = [] |
| 373 |
| 374 for original_arg in command_line_args: |
| 375 parsed_arg = original_arg[1:] |
| 376 if args_dict[parsed_arg]: |
| 377 ordered_args.append(parsed_arg) |
| 378 |
| 379 return ordered_args |
| 380 |
| 381 |
| 382 if __name__ == '__main__': |
| 383 # Set up the Remote API to use services on the live App Engine. |
| 384 remote_api.EnableRemoteApi(app_id='findit-for-me') |
| 385 |
| 386 START_DATE = datetime.datetime(2016, 5, 1) |
| 387 END_DATE = datetime.datetime(2016, 6, 17) |
| 388 |
| 389 try_job_data_query = WfTryJobData.query( |
| 390 WfTryJobData.request_time >= START_DATE, |
| 391 WfTryJobData.request_time < END_DATE) |
| 392 categorized_data = try_job_data_query.fetch() |
| 393 |
| 394 args = GetArgsInOrder() |
| 395 for arg in args: |
| 396 categorized_data = SplitStructByOption(categorized_data, arg) |
| 397 |
| 398 # TODO(lijeffrey): Display data in an html page instead of printing. |
| 399 PrettyPrint(categorized_data, START_DATE, END_DATE) |
| OLD | NEW |