Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Pulls historical try job metadata from Findit and prints a report.""" | 5 """Pulls historical try job metadata from Findit and prints a report.""" |
| 6 | 6 |
| 7 import argparse | 7 import argparse |
| 8 from collections import defaultdict | 8 from collections import defaultdict |
| 9 import datetime | 9 import datetime |
| 10 import json | 10 import json |
| 11 import numpy | 11 import numpy |
| 12 import os | 12 import os |
| 13 import sys | 13 import sys |
| 14 | 14 |
| 15 try: | |
| 16 from matplotlib import pyplot | |
| 17 except ImportError: | |
| 18 pyplot = None | |
| 19 | |
| 15 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir) | 20 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir) |
| 16 sys.path.insert(1, _REMOTE_API_DIR) | 21 sys.path.insert(1, _REMOTE_API_DIR) |
| 17 | 22 |
| 18 import remote_api | 23 import remote_api |
| 19 | 24 |
| 20 from model.wf_try_job_data import WfTryJobData | 25 from model.wf_try_job_data import WfTryJobData |
| 21 | 26 |
| 22 | 27 |
| 23 NOT_AVAILABLE = 'N/A' | 28 NOT_AVAILABLE = 'N/A' |
| 24 | 29 |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 88 def _FormatSecondsAsHMS(seconds): | 93 def _FormatSecondsAsHMS(seconds): |
| 89 """Formats the number of seconds into hours, minutes, seconds.""" | 94 """Formats the number of seconds into hours, minutes, seconds.""" |
| 90 if seconds == NOT_AVAILABLE: | 95 if seconds == NOT_AVAILABLE: |
| 91 return NOT_AVAILABLE | 96 return NOT_AVAILABLE |
| 92 | 97 |
| 93 minutes, seconds = divmod(seconds, 60) | 98 minutes, seconds = divmod(seconds, 60) |
| 94 hours, minutes = divmod(minutes, 60) | 99 hours, minutes = divmod(minutes, 60) |
| 95 return '%d:%02d:%02d' % (hours, minutes, seconds) | 100 return '%d:%02d:%02d' % (hours, minutes, seconds) |
| 96 | 101 |
| 97 | 102 |
| 103 def _GetRequestSpikes(request_times, time_window_seconds=30*60, | |
| 104 minimum_spike_size=3, show_plot=False): | |
| 105 """Calculates and plots try jobs by request time. | |
| 106 | |
| 107 Args: | |
| 108 request_time: List of datetime objects representing try job request times. | |
| 109 time_window_seconds: Maximum number of seconds between requests to count | |
| 110 as a spike. | |
| 111 minimum_spike_size: Minimum number of requests within the specified time | |
| 112 window needed to count as a spike. | |
| 113 show_plot: Boolean whether to display visual graphs of the request times. | |
| 114 | |
| 115 Returns: | |
| 116 spike_count: The number of spikes found. | |
| 117 average_spike_size: The average number of requests in each spike. | |
| 118 maximum_spike_size: The number of requests in the biggest spike. | |
| 119 """ | |
| 120 request_times = sorted(request_times) | |
| 121 | |
| 122 if show_plot: | |
| 123 if pyplot: | |
| 124 pyplot.plot(request_times, [i for i in range(len(request_times))], 'x') | |
| 125 pyplot.show() | |
| 126 else: | |
| 127 print ('In order to show plots, matplotlib needs to be installed. To ' | |
| 128 'install, please run \'sudo pip install matplotlib\'') | |
| 129 | |
| 130 candidate_spike_start = request_times[0] | |
| 131 points_in_spike = 1 | |
| 132 spike_count = 0 | |
| 133 spike_sizes = [] | |
| 134 | |
| 135 for point_being_examined in request_times[1:]: | |
|
Sharu Jiang
2016/07/18 22:15:31
I have one concern, in worst case, this approach w
lijeffrey
2016/07/19 00:33:11
Yes, with this vanilla approach it is possible som
| |
| 136 if ((point_being_examined - candidate_spike_start).total_seconds() < | |
| 137 time_window_seconds): | |
| 138 points_in_spike += 1 | |
| 139 else: | |
| 140 # The time window has passed. Need a new starting point. | |
| 141 if points_in_spike >= minimum_spike_size: | |
| 142 spike_count += 1 | |
| 143 spike_sizes.append(points_in_spike) | |
| 144 | |
| 145 candidate_spike_start = point_being_examined | |
| 146 points_in_spike = 1 # Start over. | |
| 147 | |
| 148 return (spike_count, _GetAverageOfNumbersInList(spike_sizes), | |
| 149 max(spike_sizes) if spike_sizes else 0) | |
| 150 | |
| 151 | |
| 98 def _GetReportInformation(try_job_data_list, start_date, end_date): | 152 def _GetReportInformation(try_job_data_list, start_date, end_date): |
| 99 """Computes and returns try job metadata. | 153 """Computes and returns try job metadata. |
| 100 | 154 |
| 101 Args: | 155 Args: |
| 102 try_job_data_list: A list of WfTryJobData entities. | 156 try_job_data_list: A list of WfTryJobData entities. |
| 103 start_date: The earliest request date to compute data. | 157 start_date: The earliest request date to compute data. |
| 104 end_date: The latest request date to compute data. | 158 end_date: The latest request date to compute data. |
| 105 | 159 |
| 106 Returns: | 160 Returns: |
| 107 A dict in the following format: | 161 A dict in the following format: |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 155 under_five_minutes_rate = NOT_AVAILABLE | 209 under_five_minutes_rate = NOT_AVAILABLE |
| 156 under_fifteen_minutes_rate = NOT_AVAILABLE | 210 under_fifteen_minutes_rate = NOT_AVAILABLE |
| 157 under_thirty_minutes_rate = NOT_AVAILABLE | 211 under_thirty_minutes_rate = NOT_AVAILABLE |
| 158 over_thirty_minutes_rate = NOT_AVAILABLE | 212 over_thirty_minutes_rate = NOT_AVAILABLE |
| 159 | 213 |
| 160 if try_job_data_list: | 214 if try_job_data_list: |
| 161 try_jobs_per_day = ( | 215 try_jobs_per_day = ( |
| 162 len(try_job_data_list) / float((end_date - start_date).days)) | 216 len(try_job_data_list) / float((end_date - start_date).days)) |
| 163 regression_range_sizes = [] | 217 regression_range_sizes = [] |
| 164 execution_times_seconds = [] | 218 execution_times_seconds = [] |
| 219 request_times = [] | |
| 165 in_queue_times = [] | 220 in_queue_times = [] |
| 166 end_to_end_times = [] | 221 end_to_end_times = [] |
| 167 commits_analyzed = [] | 222 commits_analyzed = [] |
| 168 culprits_detected = 0 | 223 culprits_detected = 0 |
| 169 errors_detected = 0 | 224 errors_detected = 0 |
| 170 number_under_five_minutes = 0 | 225 number_under_five_minutes = 0 |
| 171 number_under_fifteen_minutes = 0 | 226 number_under_fifteen_minutes = 0 |
| 172 number_under_thirty_minutes = 0 | 227 number_under_thirty_minutes = 0 |
| 173 number_over_thirty_minutes = 0 | 228 number_over_thirty_minutes = 0 |
| 174 total_number_of_try_jobs = len(try_job_data_list) | 229 total_number_of_try_jobs = len(try_job_data_list) |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 186 execution_times_seconds.append(execution_time) | 241 execution_times_seconds.append(execution_time) |
| 187 | 242 |
| 188 # In-queue time. | 243 # In-queue time. |
| 189 if try_job_data.start_time and try_job_data.request_time: | 244 if try_job_data.start_time and try_job_data.request_time: |
| 190 in_queue_time_delta = ( | 245 in_queue_time_delta = ( |
| 191 try_job_data.start_time - try_job_data.request_time) | 246 try_job_data.start_time - try_job_data.request_time) |
| 192 in_queue_time = in_queue_time_delta.total_seconds() | 247 in_queue_time = in_queue_time_delta.total_seconds() |
| 193 in_queue_times.append(in_queue_time) | 248 in_queue_times.append(in_queue_time) |
| 194 | 249 |
| 195 # Total time end-to-end. | 250 # Total time end-to-end. |
| 196 if try_job_data.request_time and try_job_data.end_time: | 251 if try_job_data.request_time: |
| 197 total_time_delta = try_job_data.end_time - try_job_data.start_time | 252 request_times.append(try_job_data.request_time) |
| 198 total_time_seconds = total_time_delta.total_seconds() | |
| 199 end_to_end_times.append(total_time_seconds) | |
| 200 | 253 |
| 201 if total_time_seconds < 300: # Under 5 minutes. | 254 if try_job_data.end_time: |
| 202 number_under_five_minutes += 1 | 255 total_time_delta = try_job_data.end_time - try_job_data.start_time |
| 203 elif total_time_seconds < 900: # Under 15 minutes. | 256 total_time_seconds = total_time_delta.total_seconds() |
| 204 number_under_fifteen_minutes += 1 | 257 end_to_end_times.append(total_time_seconds) |
| 205 elif total_time_seconds < 1800: # Under 30 minutes. | 258 |
| 206 number_under_thirty_minutes += 1 | 259 if total_time_seconds < 300: # Under 5 minutes. |
| 207 else: # Over 30 minutes. | 260 number_under_five_minutes += 1 |
| 208 number_over_thirty_minutes += 1 | 261 elif total_time_seconds < 900: # Under 15 minutes. |
| 262 number_under_fifteen_minutes += 1 | |
| 263 elif total_time_seconds < 1800: # Under 30 minutes. | |
| 264 number_under_thirty_minutes += 1 | |
| 265 else: # Over 30 minutes. | |
| 266 number_over_thirty_minutes += 1 | |
| 209 | 267 |
| 210 # Number of commits analyzed. | 268 # Number of commits analyzed. |
| 211 if try_job_data.number_of_commits_analyzed: | 269 if try_job_data.number_of_commits_analyzed: |
| 212 commits_analyzed.append(try_job_data.number_of_commits_analyzed) | 270 commits_analyzed.append(try_job_data.number_of_commits_analyzed) |
| 213 | 271 |
| 214 # Culprit detection rate. | 272 # Culprit detection rate. |
| 215 if try_job_data.culprits: | 273 if try_job_data.culprits: |
| 216 culprits_detected += 1 | 274 culprits_detected += 1 |
| 217 | 275 |
| 218 if try_job_data.error: | 276 if try_job_data.error: |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 258 | 316 |
| 259 under_five_minutes_rate = ( | 317 under_five_minutes_rate = ( |
| 260 float(number_under_five_minutes) / total_number_of_try_jobs) | 318 float(number_under_five_minutes) / total_number_of_try_jobs) |
| 261 under_fifteen_minutes_rate = ( | 319 under_fifteen_minutes_rate = ( |
| 262 float(number_under_fifteen_minutes) / total_number_of_try_jobs) | 320 float(number_under_fifteen_minutes) / total_number_of_try_jobs) |
| 263 under_thirty_minutes_rate = ( | 321 under_thirty_minutes_rate = ( |
| 264 float(number_under_thirty_minutes) / total_number_of_try_jobs) | 322 float(number_under_thirty_minutes) / total_number_of_try_jobs) |
| 265 over_thirty_minutes_rate = ( | 323 over_thirty_minutes_rate = ( |
| 266 float(number_over_thirty_minutes) / total_number_of_try_jobs) | 324 float(number_over_thirty_minutes) / total_number_of_try_jobs) |
| 267 | 325 |
| 326 # Calculate try job spikes. | |
| 327 spike_count, average_spike_size, maximum_spike_size = _GetRequestSpikes( | |
| 328 request_times, time_window_seconds=30*60, minimum_spike_size=3, | |
| 329 show_plot=False) | |
| 330 | |
| 268 return { | 331 return { |
| 269 'try_jobs_per_day': _FormatDigits(try_jobs_per_day), | 332 'try_jobs_per_day': _FormatDigits(try_jobs_per_day), |
| 270 'average_regression_range_size': _FormatDigits( | 333 'average_regression_range_size': _FormatDigits( |
| 271 average_regression_range_size), | 334 average_regression_range_size), |
| 272 'median_regression_range_size': median_regression_range_size, | 335 'median_regression_range_size': median_regression_range_size, |
| 273 'average_execution_time': _FormatSecondsAsHMS(_FormatDigits( | 336 'average_execution_time': _FormatSecondsAsHMS(_FormatDigits( |
| 274 average_execution_time)), | 337 average_execution_time)), |
| 275 'median_execution_time': _FormatSecondsAsHMS(_FormatDigits( | 338 'median_execution_time': _FormatSecondsAsHMS(_FormatDigits( |
| 276 median_execution_time)), | 339 median_execution_time)), |
| 277 'average_end_to_end_time': _FormatSecondsAsHMS(_FormatDigits( | 340 'average_end_to_end_time': _FormatSecondsAsHMS(_FormatDigits( |
| 278 average_end_to_end_time)), | 341 average_end_to_end_time)), |
| 279 'median_end_to_end_time': _FormatSecondsAsHMS(_FormatDigits( | 342 'median_end_to_end_time': _FormatSecondsAsHMS(_FormatDigits( |
| 280 median_end_to_end_time)), | 343 median_end_to_end_time)), |
| 281 'average_time_in_queue': _FormatSecondsAsHMS( | 344 'average_time_in_queue': _FormatSecondsAsHMS( |
| 282 _FormatDigits(average_time_in_queue)), | 345 _FormatDigits(average_time_in_queue)), |
| 283 'median_time_in_queue': _FormatSecondsAsHMS(_FormatDigits( | 346 'median_time_in_queue': _FormatSecondsAsHMS(_FormatDigits( |
| 284 median_time_in_queue)), | 347 median_time_in_queue)), |
| 285 'average_commits_analyzed': _FormatDigits(average_commits_analyzed), | 348 'average_commits_analyzed': _FormatDigits(average_commits_analyzed), |
| 286 'median_commits_analyzed': median_commits_analyzed, | 349 'median_commits_analyzed': median_commits_analyzed, |
| 287 'longest_execution_time': longest_execution_time, | 350 'longest_execution_time': longest_execution_time, |
| 288 'shortest_execution_time': shortest_execution_time, | 351 'shortest_execution_time': shortest_execution_time, |
| 289 'number_of_try_jobs': number_of_try_jobs, | 352 'number_of_try_jobs': number_of_try_jobs, |
| 290 'detection_rate': _FormatDigits(detection_rate), | 353 'detection_rate': _FormatDigits(detection_rate), |
| 291 'error_rate': _FormatDigits(error_rate), | 354 'error_rate': _FormatDigits(error_rate), |
| 292 'time_per_revision': _FormatSecondsAsHMS( | 355 'time_per_revision': _FormatSecondsAsHMS( |
| 293 _FormatDigits(time_per_revision)), | 356 _FormatDigits(time_per_revision)), |
| 294 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate), | 357 'under_five_minutes_rate': _FormatDigits(under_five_minutes_rate), |
| 295 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate), | 358 'under_fifteen_minutes_rate': _FormatDigits(under_fifteen_minutes_rate), |
| 296 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate), | 359 'under_thirty_minutes_rate': _FormatDigits(under_thirty_minutes_rate), |
| 297 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate) | 360 'over_thirty_minutes_rate': _FormatDigits(over_thirty_minutes_rate), |
| 361 'request_spike_count': spike_count, | |
| 362 'request_spike_average_size': average_spike_size, | |
| 363 'request_spike_maximum_size': maximum_spike_size, | |
| 298 } | 364 } |
| 299 | 365 |
| 300 | 366 |
| 301 def PrintCommonStats(try_job_data_list, start_date, end_date, indent): | 367 def PrintCommonStats(try_job_data_list, start_date, end_date, indent): |
| 302 """Takes a list of WfTryJobData entities and prints their stats.""" | 368 """Takes a list of WfTryJobData entities and prints their stats.""" |
| 303 spaces = '' | 369 spaces = '' |
| 304 for _ in range(indent): | 370 for _ in range(indent): |
| 305 spaces += ' ' | 371 spaces += ' ' |
| 306 | 372 |
| 307 report_info = _GetReportInformation(try_job_data_list, start_date, end_date) | 373 report_info = _GetReportInformation(try_job_data_list, start_date, end_date) |
| (...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 516 if args_dict[parsed_arg]: | 582 if args_dict[parsed_arg]: |
| 517 ordered_args.append(parsed_arg) | 583 ordered_args.append(parsed_arg) |
| 518 | 584 |
| 519 return ordered_args | 585 return ordered_args |
| 520 | 586 |
| 521 | 587 |
| 522 if __name__ == '__main__': | 588 if __name__ == '__main__': |
| 523 # Set up the Remote API to use services on the live App Engine. | 589 # Set up the Remote API to use services on the live App Engine. |
| 524 remote_api.EnableRemoteApi(app_id='findit-for-me') | 590 remote_api.EnableRemoteApi(app_id='findit-for-me') |
| 525 | 591 |
| 526 START_DATE = datetime.datetime(2016, 5, 1) | 592 START_DATE = datetime.datetime(2016, 4, 17) |
| 527 END_DATE = datetime.datetime(2016, 6, 23) | 593 END_DATE = datetime.datetime(2016, 7, 15) |
| 528 | 594 |
| 529 try_job_data_query = WfTryJobData.query( | 595 try_job_data_query = WfTryJobData.query( |
| 530 WfTryJobData.request_time >= START_DATE, | 596 WfTryJobData.request_time >= START_DATE, |
| 531 WfTryJobData.request_time < END_DATE) | 597 WfTryJobData.request_time < END_DATE) |
| 532 categorized_data = try_job_data_query.fetch() | 598 categorized_data = try_job_data_query.fetch() |
| 533 | 599 |
| 534 args = GetArgsInOrder() | 600 args = GetArgsInOrder() |
| 535 for arg in args: | 601 for arg in args: |
| 536 categorized_data = SplitStructByOption(categorized_data, arg) | 602 categorized_data = SplitStructByOption(categorized_data, arg) |
| 537 | 603 |
| 538 # TODO(lijeffrey): Display data in an html page instead of printing. | 604 # TODO(lijeffrey): Display data in an html page instead of printing. |
| 539 PrettyPrint(categorized_data, START_DATE, END_DATE) | 605 PrettyPrint(categorized_data, START_DATE, END_DATE) |
| OLD | NEW |