Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """Pulls historical try job metadata from Findit and prints a report.""" | |
| 6 | |
| 7 from collections import defaultdict | |
| 8 import datetime | |
| 9 import os | |
| 10 import sys | |
| 11 | |
| 12 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir) | |
| 13 sys.path.insert(1, _REMOTE_API_DIR) | |
| 14 | |
| 15 import remote_api | |
| 16 | |
| 17 from model.wf_config import FinditConfig | |
| 18 from model.wf_try_job_data import WfTryJobData | |
| 19 | |
| 20 | |
| 21 NOT_AVAILABLE = 'N/A' | |
| 22 | |
| 23 | |
| 24 def _GetAverageOfNumbersInList(numbers): | |
| 25 """Returns a float average of numbers or NOT_AVAILABLE if numbers is empty.""" | |
| 26 return (float(sum(numbers)) / len(numbers)) if numbers else NOT_AVAILABLE | |
| 27 | |
| 28 | |
| 29 def _FormatDigits(number): | |
| 30 """Formats number into a 2-digit float, or NOT_AVAILABLE.""" | |
| 31 if isinstance(number, float): | |
| 32 return float('%.2f' % number) | |
| 33 return NOT_AVAILABLE | |
| 34 | |
| 35 | |
| 36 def _FormatSecondsAsHMS(seconds): | |
| 37 """Formats the number of seconds into hours, minutes, seconds.""" | |
| 38 if seconds == NOT_AVAILABLE: | |
| 39 return NOT_AVAILABLE | |
| 40 | |
| 41 minutes, seconds = divmod(seconds, 60) | |
| 42 hours, minutes = divmod(minutes, 60) | |
| 43 return '%d:%02d:%02d' % (hours, minutes, seconds) | |
| 44 | |
| 45 | |
| 46 def _GetReportInformation(try_job_data_list, start_date, end_date): | |
| 47 """Computes and returns try job metadata. | |
| 48 | |
| 49 Args: | |
| 50 try_job_data_list: A list of WfTryJobData entities. | |
| 51 start_date: The earliest request date to compute data. | |
| 52 end_date: The latest request date to compute data. | |
| 53 | |
| 54 Returns: | |
| 55 A dict in the following format: | |
| 56 { | |
| 57 'try_jobs_per_day': The average number of jobs requested over the time | |
| 58 period specified, | |
| 59 'average_regression_range_size': The average number of revisions in the | |
| 60 regression range when the original failure was detected, | |
| 61 'average_execution_time': The average amount of time spent on each try | |
| 62 job not including in-queue time. | |
| 63 'average_time_in_queue': The average amount of time a try job spends | |
| 64 in-queue before it is picked up. | |
| 65 'average_commits_analyzed': The average number of revisions each try job | |
| 66 needed to run before coming to a conclusion, | |
| 67 'longest_execution_time': The length of time of the slowest try job, | |
| 68 'shortest_execution_time': The length of time of the fastest try job, | |
| 69 'number_of_try_jobs': The number of try jobs in this list, | |
| 70 'detection_rate': The number of try jobs that found any culprits at all | |
| 71 regardless of correctness over the total number of try jobs. | |
| 72 'error_rate': The number of try jobs that had an error / the total | |
| 73 number of try jobs in the list. | |
| 74 'time_per_revision': The average amount of execution time spent on each | |
| 75 revision. | |
| 76 'under_five_minutes_rate': The number of try jobs that finished under 5 | |
| 77 minutes / total try jobs. | |
| 78 'under_fifteen_minutes_rate': The number of try jobs that finished in | |
| 79 under 15 minutes / total try jobs. | |
| 80 'under_thirty_minutes_rate': The number of try jobs that finished in | |
| 81 under 30 minutes / total try jobs. | |
| 82 'over_thirty_minutes_rate': The number of try jobs that finished in over | |
| 83 30 minutes / total try jobs. | |
| 84 } | |
| 85 """ | |
| 86 try_jobs_per_day = NOT_AVAILABLE | |
| 87 average_regression_range_size = NOT_AVAILABLE | |
| 88 average_execution_time = NOT_AVAILABLE | |
| 89 average_time_in_queue = NOT_AVAILABLE | |
| 90 average_commits_analyzed = NOT_AVAILABLE | |
| 91 longest_execution_time = NOT_AVAILABLE | |
| 92 shortest_execution_time = NOT_AVAILABLE | |
| 93 detection_rate = NOT_AVAILABLE | |
| 94 error_rate = NOT_AVAILABLE | |
| 95 number_of_try_jobs = len(try_job_data_list) if try_job_data_list else 0 | |
| 96 time_per_revision = NOT_AVAILABLE | |
| 97 under_five_minutes_rate = NOT_AVAILABLE | |
| 98 under_fifteen_minutes_rate = NOT_AVAILABLE | |
| 99 under_thirty_minutes_rate = NOT_AVAILABLE | |
| 100 over_thirty_minutes_rate = NOT_AVAILABLE | |
| 101 | |
| 102 if try_job_data_list: | |
| 103 try_jobs_per_day = ( | |
| 104 len(try_job_data_list) / float((end_date - start_date).days)) | |
| 105 regression_range_sizes = [] | |
| 106 execution_times_seconds = [] | |
| 107 in_queue_times = [] | |
| 108 commits_analyzed = [] | |
| 109 culprits_detected = 0 | |
| 110 errors_detected = 0 | |
| 111 number_under_five_minutes = 0 | |
| 112 number_under_fifteen_minutes = 0 | |
| 113 number_under_thirty_minutes = 0 | |
| 114 number_over_thirty_minutes = 0 | |
| 115 total_number_of_try_jobs = len(try_job_data_list) | |
| 116 | |
| 117 for try_job_data in try_job_data_list: | |
| 118 # Regression range size. | |
| 119 if try_job_data.regression_range_size: | |
| 120 regression_range_sizes.append(try_job_data.regression_range_size) | |
| 121 | |
| 122 # Execution time. | |
| 123 if try_job_data.start_time and try_job_data.end_time: | |
| 124 execution_time_delta = ( | |
| 125 try_job_data.end_time - try_job_data.start_time) | |
| 126 execution_time = execution_time_delta.total_seconds() | |
| 127 execution_times_seconds.append(execution_time) | |
| 128 | |
| 129 # In-queue time. | |
| 130 if try_job_data.start_time and try_job_data.request_time: | |
| 131 in_queue_time_delta = ( | |
| 132 try_job_data.start_time - try_job_data.request_time) | |
| 133 in_queue_time = in_queue_time_delta.total_seconds() | |
| 134 in_queue_times.append(in_queue_time) | |
| 135 | |
| 136 # Total time end-to-end. | |
| 137 if try_job_data.request_time and try_job_data.end_time: | |
| 138 total_time_delta = try_job_data.end_time - try_job_data.start_time | |
| 139 total_time_seconds = total_time_delta.total_seconds() | |
| 140 | |
| 141 if total_time_seconds < 300: # Under 5 minutes. | |
| 142 number_under_five_minutes += 1 | |
| 143 elif total_time_seconds < 900: # Under 15 minutes. | |
| 144 number_under_fifteen_minutes += 1 | |
| 145 elif total_time_seconds < 1800: # Under 30 minutes. | |
| 146 number_under_thirty_minutes += 1 | |
| 147 else: # Over 30 minutes. | |
| 148 number_over_thirty_minutes += 1 | |
| 149 | |
| 150 # Number of commits analyzed. | |
| 151 if try_job_data.number_of_commits_analyzed: | |
| 152 commits_analyzed.append(try_job_data.number_of_commits_analyzed) | |
| 153 | |
| 154 # Culprit detection rate. | |
| 155 if try_job_data.culprits: | |
| 156 culprits_detected += 1 | |
| 157 | |
| 158 if try_job_data.error: | |
| 159 errors_detected += 1 | |
| 160 | |
| 161 average_regression_range_size = _GetAverageOfNumbersInList( | |
| 162 regression_range_sizes) | |
| 163 average_execution_time = (_GetAverageOfNumbersInList( | |
| 164 execution_times_seconds) if execution_times_seconds else NOT_AVAILABLE) | |
| 165 average_time_in_queue = ( | |
| 166 _GetAverageOfNumbersInList(in_queue_times) if in_queue_times else | |
| 167 NOT_AVAILABLE) | |
| 168 average_commits_analyzed = _GetAverageOfNumbersInList( | |
| 169 commits_analyzed) | |
| 170 longest_execution_time = ( | |
| 171 str(datetime.timedelta(seconds=max(execution_times_seconds))) | |
| 172 if execution_times_seconds else NOT_AVAILABLE) | |
| 173 shortest_execution_time = ( | |
| 174 str(datetime.timedelta(seconds=min(execution_times_seconds))) | |
| 175 if execution_times_seconds else NOT_AVAILABLE) | |
| 176 detection_rate = float(culprits_detected) / total_number_of_try_jobs | |
| 177 error_rate = float(errors_detected) / total_number_of_try_jobs | |
| 178 time_per_revision = (average_execution_time / average_commits_analyzed if ( | |
| 179 average_execution_time != NOT_AVAILABLE and | |
| 180 average_commits_analyzed != NOT_AVAILABLE) else NOT_AVAILABLE) | |
| 181 | |
| 182 under_five_minutes_rate = ( | |
| 183 float(number_under_five_minutes) / total_number_of_try_jobs) | |
| 184 under_fifteen_minutes_rate = ( | |
| 185 float(number_under_fifteen_minutes) / total_number_of_try_jobs) | |
| 186 under_thirty_minutes_rate = ( | |
| 187 float(number_under_thirty_minutes) / total_number_of_try_jobs) | |
| 188 over_thirty_minutes_rate = ( | |
| 189 float(number_over_thirty_minutes) / total_number_of_try_jobs) | |
| 190 | |
| 191 return { | |
| 192 'try_jobs_per_day': _FormatDigits(try_jobs_per_day), | |
| 193 'average_regression_range_size': _FormatDigits( | |
| 194 average_regression_range_size), | |
| 195 'average_execution_time': _FormatSecondsAsHMS( | |
| 196 _FormatDigits(average_execution_time)), | |
| 197 'average_time_in_queue': _FormatSecondsAsHMS( | |
| 198 _FormatDigits(average_time_in_queue)), | |
| 199 'average_commits_analyzed': _FormatDigits(average_commits_analyzed), | |
| 200 'longest_execution_time': longest_execution_time, | |
| 201 'shortest_execution_time': shortest_execution_time, | |
| 202 'number_of_try_jobs': number_of_try_jobs, | |
| 203 'detection_rate': _FormatDigits(detection_rate), | |
| 204 'error_rate': _FormatDigits(error_rate), | |
| 205 'time_per_revision': _FormatSecondsAsHMS( | |
| 206 _FormatDigits(time_per_revision)), | |
| 207 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate), | |
| 208 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate), | |
| 209 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate), | |
| 210 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate) | |
| 211 } | |
| 212 | |
| 213 | |
| 214 def PrintCommonStats(try_job_data_list, start_date, end_date, indent): | |
| 215 """Takes a list of WfTryJobData entities and prints their stats.""" | |
| 216 spaces = '' | |
| 217 for _ in range(indent): | |
| 218 spaces += ' ' | |
| 219 | |
| 220 report_info = _GetReportInformation(try_job_data_list, start_date, end_date) | |
| 221 for key, value in report_info.iteritems(): | |
| 222 print '%s%s: %s' % (spaces, key, value) | |
| 223 | |
| 224 | |
| 225 def PrettyPrint(grouped_data, start_date, end_date, indent=0): | |
| 226 if not grouped_data: | |
| 227 return | |
| 228 if isinstance(grouped_data, list): | |
| 229 # Print the stats about the list. | |
| 230 PrintCommonStats(grouped_data, start_date, end_date, indent) | |
| 231 elif isinstance(grouped_data, dict): | |
| 232 spaces = '' | |
| 233 for _ in range(indent): | |
| 234 spaces += ' ' | |
| 235 | |
| 236 for field, data in grouped_data.iteritems(): | |
| 237 print spaces + field | |
| 238 PrettyPrint(data, start_date, end_date, indent + 2) | |
| 239 else: | |
| 240 raise Exception('grouped_data dict should only contain dicts or lists.') | |
| 241 | |
| 242 | |
| 243 def _SplitListByTryJobType(try_job_data_list): | |
| 244 categorized_data_dict = { | |
| 245 'compile': [], | |
| 246 'test': [] | |
| 247 } | |
| 248 for try_job_data in try_job_data_list: | |
| 249 if try_job_data.try_job_type.lower() == 'compile': | |
| 250 categorized_data_dict['compile'].append(try_job_data) | |
| 251 elif try_job_data.try_job_type.lower() == 'test': | |
| 252 categorized_data_dict['test'].append(try_job_data) | |
| 253 | |
| 254 return categorized_data_dict | |
| 255 | |
| 256 | |
| 257 def _SplitListByMaster(try_job_data_list): | |
| 258 categorized_data_dict = defaultdict(list) | |
| 259 | |
| 260 for try_job_data in try_job_data_list: | |
| 261 master_name = try_job_data.master_name | |
| 262 | |
| 263 if not master_name: | |
| 264 continue | |
| 265 | |
| 266 categorized_data_dict[master_name].append(try_job_data) | |
| 267 | |
| 268 return categorized_data_dict | |
| 269 | |
| 270 | |
| 271 def _SplitListByBuilder(try_job_data_list): | |
| 272 categorized_data_dict = defaultdict(list) | |
| 273 | |
| 274 for try_job_data in try_job_data_list: | |
| 275 builder_name = try_job_data.builder_name | |
| 276 | |
| 277 if not builder_name: | |
| 278 continue | |
| 279 | |
| 280 categorized_data_dict[builder_name].append(try_job_data) | |
| 281 | |
| 282 return categorized_data_dict | |
| 283 | |
| 284 | |
| 285 def _SplitListByHeuristicResults(try_job_data_list): | |
| 286 categorized_data_dict = { | |
| 287 'With heuristic guidance': [], | |
| 288 'Without heuristic guidance': [] | |
| 289 } | |
| 290 for try_job_data in try_job_data_list: | |
| 291 if try_job_data.has_heuristic_results: | |
| 292 categorized_data_dict['With heuristic guidance'].append(try_job_data) | |
| 293 else: | |
| 294 categorized_data_dict['Without heuristic guidance'].append(try_job_data) | |
| 295 return categorized_data_dict | |
| 296 | |
| 297 | |
| 298 def _SplitListByCompileTargets(try_job_data_list): | |
| 299 categorized_data_dict = { | |
| 300 'With compile targets': [], | |
| 301 'Without compile targets': [] | |
| 302 } | |
| 303 for try_job_data in try_job_data_list: | |
| 304 if try_job_data.has_compile_targets: | |
| 305 categorized_data_dict['With compile targets'].append(try_job_data) | |
| 306 else: | |
| 307 categorized_data_dict['Without compile targets'].append(try_job_data) | |
| 308 return categorized_data_dict | |
| 309 | |
| 310 | |
| 311 def SplitListByArg(try_job_data_list, arg): | |
| 312 """Takes a WfTryJobData list and separates it into a dict based on arg. | |
| 313 | |
| 314 Args: | |
| 315 try_job_data_list: A list of WfTryJobData entities. | |
| 316 arg: An argument with which to split the data by. Options include: | |
| 317 --type: To categorized_data_dict by try job type ('compile', 'test'). | |
| 318 --per-master: To categorize try_job_data_list by WfTryJobData.master_name. | |
| 319 --per-builder: To categorize try_job_data_list by | |
| 320 WfTryJobData.builder_name. | |
| 321 --has-heuristic-results: To separate by whether or not heuristic results | |
| 322 were passed to the try job. | |
| 323 --has-compile-targets: To separate by whether or not compile targets were | |
| 324 passed to the try job. Note this field is not meaningful for test try | |
| 325 jobs. | |
| 326 | |
| 327 Returns: | |
| 328 A dict where the keys are how the data is separated based on arg and the | |
| 329 values are the corresponding lists of data. | |
| 330 """ | |
| 331 | |
| 332 if arg == '--type': | |
| 333 return _SplitListByTryJobType(try_job_data_list) | |
| 334 elif arg == '--per-master': | |
| 335 return _SplitListByMaster(try_job_data_list) | |
| 336 elif arg == '--per-builder': | |
| 337 # Since builders are tied to masters, group by master as well first. | |
| 338 categorized_data_dict = _SplitListByMaster(try_job_data_list) | |
| 339 | |
| 340 for master, data_list in categorized_data_dict.iteritems(): | |
| 341 categorized_data_dict[master] = _SplitListByBuilder(data_list) | |
| 342 return categorized_data_dict | |
| 343 elif arg == '--has-heuristic-results': | |
| 344 return _SplitListByHeuristicResults(try_job_data_list) | |
| 345 elif arg == '--has-compile-targets': | |
| 346 return _SplitListByCompileTargets(try_job_data_list) | |
| 347 # TODO(lijeffrey): Add support for splitting by platform. | |
| 348 | |
| 349 # Unsupported flag, bail out without modification. | |
| 350 return try_job_data_list | |
| 351 | |
| 352 | |
| 353 def SplitStructByArg(try_job_data_struct, arg): | |
| 354 if isinstance(try_job_data_struct, list): | |
| 355 try_job_data_struct = SplitListByArg(try_job_data_struct, arg) | |
| 356 elif isinstance(try_job_data_struct, dict): | |
| 357 for key, struct in try_job_data_struct.iteritems(): | |
| 358 try_job_data_struct[key] = SplitStructByArg(struct, arg) | |
| 359 else: | |
| 360 raise Exception('try job data dict must only contain lists or dicts.') | |
| 361 | |
| 362 return try_job_data_struct | |
| 363 | |
| 364 | |
| 365 def GetArgs(): | |
| 366 # Split data based on command line passed. | |
| 367 args = sys.argv[1:] | |
|
stgao
2016/06/13 23:48:21
If we can't use https://docs.python.org/2/howto/ar
lijeffrey
2016/06/16 19:12:52
Done.
| |
| 368 | |
| 369 if '--per-master' in args and '--per-builder' in args: | |
| 370 # Categorizing by builder should categorize per master regardles. Remove | |
| 371 # per master to avoid doing it twice. | |
| 372 args.remove('--per-master') | |
| 373 | |
| 374 return args | |
| 375 | |
| 376 | |
| 377 if __name__ == '__main__': | |
| 378 # Set up the Remote API to use services on the live App Engine. | |
| 379 remote_api.EnableRemoteApi(app_id='findit-for-me') | |
| 380 | |
| 381 START_DATE = datetime.datetime(2016, 5, 1) | |
| 382 END_DATE = datetime.datetime(2016, 6, 8) | |
| 383 | |
| 384 try_job_data_query = WfTryJobData.query( | |
| 385 WfTryJobData.request_time >= START_DATE, | |
| 386 WfTryJobData.request_time < END_DATE) | |
| 387 categorized_data = try_job_data_query.fetch() | |
| 388 | |
| 389 args = GetArgs() | |
| 390 | |
| 391 for arg in args: | |
| 392 categorized_data = SplitStructByArg(categorized_data, arg) | |
| 393 | |
| 394 PrettyPrint(categorized_data, START_DATE, END_DATE) | |
| OLD | NEW |