| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/python | |
| 2 # Copyright 2013 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 '''Generate stats of CQ usage.''' | |
| 6 | |
| 7 # sudo apt-get install pip; sudo pip install requests request_cache | |
| 8 import argparse | |
| 9 import collections | |
| 10 from datetime import timedelta, datetime | |
| 11 import itertools | |
| 12 import numpy | |
| 13 import operator | |
| 14 import re | |
| 15 import requests_cache | |
| 16 import time | |
| 17 import sys | |
| 18 | |
| 19 | |
| 20 class Unbuffered: | |
| 21 def __init__(self, stream): | |
| 22 self.stream = stream | |
| 23 def write(self, data): | |
| 24 self.stream.write(data) | |
| 25 self.stream.flush() | |
| 26 def __getattr__(self, attr): | |
| 27 return getattr(self.stream, attr) | |
| 28 sys.stdout = Unbuffered(sys.stdout) | |
| 29 | |
| 30 | |
| 31 CONFIG = { | |
| 32 'chrome': { | |
| 33 'target_base_url_regexps': [ | |
| 34 re.compile(r'^svn://svn\.chromium\.org/chrome/trunk/src'), | |
| 35 # pylint: disable=C0301 | |
| 36 re.compile(r'^https://chromium\.googlesource\.com/chromium/chromium/src(?:
\.git)?(?:@[\w\-]+)?$'), | |
| 37 re.compile(r'^http://src\.chromium\.org/chrome/trunk/src'), | |
| 38 ], | |
| 39 }, | |
| 40 'blink': { | |
| 41 'target_base_url_regexps': [ | |
| 42 re.compile(r'^svn://svn\.chromium\.org/blink/(?:trunk/?)?$'), | |
| 43 # pylint: disable=C0301 | |
| 44 re.compile(r'^https://chromium\.googlesource\.com/chromium/blink(?:\.git)?
(?:@[\w\-]+)?$'), | |
| 45 re.compile(r'^http://src\.chromium\.org/blink/(?:trunk/?)?$'), | |
| 46 ], | |
| 47 }, | |
| 48 'skia': { | |
| 49 'target_base_url_regexps': [ | |
| 50 re.compile(r'^http://skia.googlecode.com/svn/trunk'), | |
| 51 re.compile(r'^https://skia.googlecode.com/svn/trunk'), | |
| 52 ], | |
| 53 }, | |
| 54 } | |
| 55 config_name = None | |
| 56 | |
| 57 | |
| 58 def make_throttle_hook(timeout=1.0): | |
| 59 # pylint: disable=W0613 | |
| 60 def hook(response, *args, **kwargs): | |
| 61 print response.url | |
| 62 if not getattr(response, 'from_cache', False): | |
| 63 print 'sleeping' | |
| 64 time.sleep(timeout) | |
| 65 return response | |
| 66 return hook | |
| 67 | |
| 68 | |
| 69 # It seems reasonable to expect that rietveld can handle 20 query-per-second | |
| 70 # load from this script. Ask someone on the infra team before reducing this | |
| 71 # value. | |
| 72 seconds_between_requests = 0.05 | |
| 73 | |
| 74 requests_cache.install_cache('cq') | |
| 75 session = requests_cache.CachedSession() | |
| 76 # if args.clear: | |
| 77 # print 'Clearing cache...' | |
| 78 # session.cache.clear() | |
| 79 session.hooks = {'response': make_throttle_hook(seconds_between_requests)} | |
| 80 | |
| 81 | |
| 82 def fetch_recent_issues(limit, args, cursor=None): | |
| 83 search_url = ('https://codereview.chromium.org/search?format=json&' | |
| 84 'limit=%d') % limit | |
| 85 if cursor: | |
| 86 search_url += '&cursor=%s' % cursor | |
| 87 if args.modified_after: | |
| 88 search_url += '&modified_after=%s' % args.modified_after | |
| 89 if args.modified_before: | |
| 90 search_url += '&modified_before=%s' % args.modified_before | |
| 91 response = session.get(search_url) | |
| 92 return response.json() | |
| 93 | |
| 94 | |
| 95 def get_target_base_url_regexps(): | |
| 96 return CONFIG[config_name]['target_base_url_regexps'] | |
| 97 | |
| 98 | |
| 99 def matches_target_base_url(url): | |
| 100 for regexp in get_target_base_url_regexps(): | |
| 101 match = regexp.match(url) | |
| 102 if match: | |
| 103 return True | |
| 104 return False | |
| 105 | |
| 106 | |
| 107 def issue_patch_string(issue_id, patch_id): | |
| 108 return '%s:%s' % (issue_id, patch_id) | |
| 109 | |
| 110 | |
| 111 def issue_display_url(issue_id): | |
| 112 return 'https://codereview.chromium.org/%s' % issue_id | |
| 113 | |
| 114 | |
| 115 def issue_url(issue_id): | |
| 116 return 'https://codereview.chromium.org/api/%s?messages=true' % issue_id | |
| 117 | |
| 118 | |
| 119 def date_from_iso_str(iso_str): | |
| 120 # Issues have one of two formats: | |
| 121 # 2013-10-17 16:43:04.391480 | |
| 122 # 2013-10-17 16:43:04 | |
| 123 try: | |
| 124 return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S.%f') | |
| 125 except ValueError: | |
| 126 return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S') | |
| 127 | |
| 128 | |
| 129 def round_timedelta(delta): | |
| 130 # We never care about the milliseconds when printing timedeltas: | |
| 131 return timedelta(seconds=round(delta.total_seconds())) | |
| 132 | |
| 133 | |
| 134 def fetch_issue(issue_id): | |
| 135 url = issue_url(issue_id) | |
| 136 this_json = session.get(url).json() | |
| 137 # pylint: disable=W0212 | |
| 138 cache_key = session.cache._url_to_key(url) | |
| 139 _, now = session.cache.get_response_and_time(cache_key) | |
| 140 return this_json, now | |
| 141 | |
| 142 | |
| 143 def format_line(values, aligns, widths): | |
| 144 values = map(str, values) | |
| 145 args = zip(values, aligns, widths) | |
| 146 return ' '.join([apply(align, (value, width)) | |
| 147 for value, align, width in args]) | |
| 148 | |
| 149 | |
| 150 class CQAttempt(object): | |
| 151 def __init__(self): | |
| 152 self.result = None | |
| 153 self.patch_id = None | |
| 154 self.issue_id = None | |
| 155 self.author = None | |
| 156 self.start_time = None | |
| 157 self.end_time = None | |
| 158 | |
| 159 @property | |
| 160 def issue_patch_string(self): | |
| 161 return issue_patch_string(self.issue_id, self.patch_id) | |
| 162 | |
| 163 @property | |
| 164 def duration(self): | |
| 165 return self.end_time - self.start_time | |
| 166 | |
| 167 | |
| 168 class CQAnalyzer(object): | |
| 169 def __init__(self): | |
| 170 self.state = None | |
| 171 self.issue = None | |
| 172 self.issue_id = None | |
| 173 self.author = None | |
| 174 self.patch_id = None | |
| 175 self.start_time = None | |
| 176 self.attempts = [] | |
| 177 self.manual_commits = set() | |
| 178 self.no_try_issues = set() | |
| 179 self.now = None | |
| 180 # issue_id -> time since CQ start | |
| 181 self.in_progress = {} | |
| 182 | |
| 183 # pylint: disable=C0301 | |
| 184 start_regexp = re.compile('''CQ is trying da patch. Follow status at | |
| 185 https://chromium-status.appspot.com/cq/(?P<author>.*)/(?P<issue_id>\d+)/(?P<patc
h_id>\d+)''') | |
| 186 committed_regexp = re.compile('Change committed as (?P<revision>\d+)') | |
| 187 # pylint: disable=C0301 | |
| 188 manual_commit_regexp = re.compile('Committed patchset #(?P<patch_id>\d+) manua
lly as r(?P<revision>\d+)') | |
| 189 | |
| 190 def _reset(self): | |
| 191 self.state = None | |
| 192 self.author = None | |
| 193 self.patch_id = None | |
| 194 self.start_time = None | |
| 195 | |
| 196 def _start(self, patch_id, start_time, author=None): | |
| 197 if self.state == 'START' and self.patch_id == patch_id: | |
| 198 # print 'Restarted for patchset %s on %s' % ( | |
| 199 # patch_id, issue_display_url(self.issue_id)) | |
| 200 self._end('RESTART', start_time) | |
| 201 self.state = 'START' | |
| 202 self.patch_id = patch_id | |
| 203 self.author = author | |
| 204 assert start_time | |
| 205 self.start_time = start_time | |
| 206 | |
| 207 def _end(self, result, end_time): | |
| 208 if self.state != 'START': | |
| 209 print 'ERROR: %s for %s without start??' % ( | |
| 210 result, issue_display_url(self.issue_id)) | |
| 211 self._reset() | |
| 212 return | |
| 213 attempt = CQAttempt() | |
| 214 attempt.result = result | |
| 215 attempt.patch_id = self.patch_id | |
| 216 attempt.issue_id = self.issue_id | |
| 217 attempt.start_time = self.start_time | |
| 218 attempt.end_time = end_time | |
| 219 self.attempts.append(attempt) | |
| 220 self._reset() | |
| 221 | |
| 222 def _start_and_end(self, result, date): | |
| 223 self._start(result, date) | |
| 224 self._end(result, date) | |
| 225 | |
| 226 def _manual_commit(self, patch_id): | |
| 227 self.manual_commits.add(issue_patch_string(self.issue_id, patch_id)) | |
| 228 | |
| 229 def process_message(self, message): | |
| 230 text = message['text'] | |
| 231 date = date_from_iso_str(message['date']) | |
| 232 | |
| 233 match = self.manual_commit_regexp.match(text) | |
| 234 if match: | |
| 235 if self.start_time: | |
| 236 self._end('MANUAL_COMMIT', | |
| 237 date_from_iso_str(self.issue['modified'])) | |
| 238 patch_number = int(match.group('patch_id')) | |
| 239 # gcl dcommit talks in terms of patch numbers 1-5, we need to map | |
| 240 # those to patchset ids: | |
| 241 if len(self.issue['patchsets']) > (patch_number-1): | |
| 242 patch_id = self.issue['patchsets'][patch_number-1] | |
| 243 else: | |
| 244 print '%s missing patch number %d' % ( | |
| 245 self.issue_id, patch_number) | |
| 246 print self.issue['patchsets'] | |
| 247 print 'issue, now = fetch_issue(\'%s\')' % self.issue_id | |
| 248 patch_id = self.issue['patchsets'][-1] | |
| 249 self._manual_commit(patch_id) | |
| 250 return | |
| 251 | |
| 252 if message['sender'] != 'commit-bot@chromium.org': | |
| 253 return | |
| 254 | |
| 255 match = self.start_regexp.match(text) | |
| 256 if match: | |
| 257 self._start(match.group('patch_id'), date, match.group('author')) | |
| 258 return | |
| 259 match = self.committed_regexp.match(text) | |
| 260 if match: | |
| 261 self._end('SUCCESS', date) | |
| 262 return | |
| 263 | |
| 264 cq_end_messages = { | |
| 265 'TRY': 'Retried try job', | |
| 266 'APPLY': 'Failed to apply patch', | |
| 267 'APPLY2': 'Failed to apply the patch', | |
| 268 'COMPILE': 'Sorry for I got bad news for ya', | |
| 269 'DESCRIPTION_CHANGED': ('Commit queue rejected this change because ' | |
| 270 'the description'), | |
| 271 # This is too conservative. | |
| 272 'REVIEWERS_CHANGED': 'List of reviewers changed.', | |
| 273 # User caused. | |
| 274 'PATCH_CHANGED': 'Commit queue failed due to new patchset.', | |
| 275 # FAILED_TO_TRIGGER is a very serious failure, unclear why it | |
| 276 # happens! | |
| 277 'FAILED_TO_TRIGGER': 'Failed to trigger a try job on', | |
| 278 # BINARY_FILE is just a bug in the CQ. | |
| 279 'BINARY_FILE': 'Can\'t process patch for file', | |
| 280 'BINARY_FILE2': 'Failed to request the patch to try', | |
| 281 # Unclear why UPDATE_STEP happens. Likely broken bots, shouldn't | |
| 282 # fail patches! | |
| 283 'UPDATE_STEP': 'Step "update" is always a major failure.', | |
| 284 'BERSERK': 'The commit queue went berserk', | |
| 285 'INTERNAL_ERROR': 'Commit queue had an internal error.', | |
| 286 } | |
| 287 for result, message_text in cq_end_messages.items(): | |
| 288 if text.startswith(message_text): | |
| 289 self._end(result, date) | |
| 290 return | |
| 291 | |
| 292 cq_fail_to_start_messsages = { | |
| 293 'FAILED_PATCHSET': 'Failed to get patchset properties', | |
| 294 'NO_COMMENTS': 'No comments yet.', | |
| 295 'NO_LGTM': 'No LGTM from a valid reviewer yet.', | |
| 296 } | |
| 297 for result, message_text in cq_fail_to_start_messsages.items(): | |
| 298 if text.startswith(message_text): | |
| 299 self._start_and_end(result, date) | |
| 300 return | |
| 301 | |
| 302 print 'Error %s has unknown message: %s' % ( | |
| 303 issue_display_url(self.issue_id), text) | |
| 304 self._end('UNKNOWN', date) | |
| 305 | |
| 306 def start_issue(self, issue, now): | |
| 307 # We may be dealing with cached data, so cache our concept of "now" | |
| 308 self.now = now | |
| 309 self.issue = issue | |
| 310 self.issue_id = issue['issue'] | |
| 311 | |
| 312 def end_issue(self, issue): | |
| 313 issue_id = issue['issue'] | |
| 314 assert issue_id == self.issue_id | |
| 315 if self.start_time: | |
| 316 if self.issue['commit']: | |
| 317 print '%s in progress? %s' % ( | |
| 318 round_timedelta(self.now - self.start_time), | |
| 319 issue_display_url(self.issue_id)) | |
| 320 self._end('IN_PROGRESS', self.now) | |
| 321 else: | |
| 322 # There are various reasons why the commit box may have | |
| 323 # been unchecked, including that the issue may be closed now. | |
| 324 # If those are not from a user, then that's a bug in rietveld | |
| 325 # and out of scope for this script. For now we're just ignoring | |
| 326 # cases where the box was unchecked. We could log an _end() | |
| 327 # event, but which time would we use? last_modified has at | |
| 328 # best a weak corrolation with the issue close or CQ checkbox. | |
| 329 self._reset() | |
| 330 # _reset() shouldn't be needed, but just in case. | |
| 331 self._reset() | |
| 332 self.issue_id = None | |
| 333 self.issue = None | |
| 334 | |
| 335 def process_issue(self, issue, now): | |
| 336 issue_id = issue['issue'] | |
| 337 | |
| 338 if re.search('^NOTRY=[tT][rR][uU][eE]$', issue['description'], re.M): | |
| 339 self.no_try_issues.add(issue_id) | |
| 340 | |
| 341 self.start_issue(issue, now) | |
| 342 for message in issue['messages']: | |
| 343 self.process_message(message) | |
| 344 # print '%(date)s: %(text)s' % message | |
| 345 self.end_issue(issue) | |
| 346 | |
| 347 def print_attempts_table(self): | |
| 348 result_getter = operator.attrgetter('result') | |
| 349 attempts = sorted(self.attempts, key=result_getter) | |
| 350 | |
| 351 col_aligns = (str.rjust, str.ljust, str.rjust, str.rjust) | |
| 352 col_widths = (5, 20, 16, 16) | |
| 353 col_headers = ('count', 'result', 'mean duration', 'median duration') | |
| 354 print | |
| 355 print ('CQ Attempts by result (there can be more than ' | |
| 356 'one attempt on the same patch)') | |
| 357 print format_line(col_headers, col_aligns, col_widths) | |
| 358 print '-' * (sum(col_widths) + len(col_widths) - 1) | |
| 359 for result, group_itr in itertools.groupby(attempts, result_getter): | |
| 360 group = list(group_itr) | |
| 361 durations = map(operator.attrgetter('duration'), group) | |
| 362 mean = sum(durations, timedelta()) / len(group) | |
| 363 durations_seconds = map(timedelta.total_seconds, durations) | |
| 364 median_seconds = numpy.median(durations_seconds) | |
| 365 median = timedelta(seconds=round(median_seconds)) | |
| 366 values = (len(group), result, round_timedelta(mean), median) | |
| 367 print format_line(values, col_aligns, col_widths) | |
| 368 print | |
| 369 | |
| 370 | |
| 371 def main(): | |
| 372 parser = argparse.ArgumentParser() | |
| 373 parser.add_argument('--base-url-stats', action='store_true', | |
| 374 help='print base_url matching stats') | |
| 375 parser.add_argument('--chrome', action='store_true') | |
| 376 parser.add_argument('--clear', action='store_true', | |
| 377 help='clear the cache before starting') | |
| 378 parser.add_argument('--count', type=int, default=None, | |
| 379 help='number of issues to fetch') | |
| 380 parser.add_argument('--modified-after', default=None, | |
| 381 help=('search for issues modified after ' | |
| 382 'this date YYYY-MM-DD')) | |
| 383 parser.add_argument('--modified-before', default=None, | |
| 384 help=('search for issues modified before ' | |
| 385 'this date YYYY-MM-DD')) | |
| 386 parser.add_argument('--progress', help='Show progress', action='store_true') | |
| 387 parser.add_argument('--skia', action='store_true') | |
| 388 args = parser.parse_args() | |
| 389 | |
| 390 # Rietveld allows up to 1000 results per search query. I don't know if | |
| 391 # that's better for the server or not. We'll use 100 to be safe. | |
| 392 per_request_search_limit = 100 | |
| 393 if args.count: | |
| 394 per_request_search_limit = min(int(args.count/2), 100) | |
| 395 # Currently looking at the last 2000 entries. | |
| 396 target_count = args.count | |
| 397 if args.clear: | |
| 398 print 'Clearing cache...' | |
| 399 session.cache.clear() | |
| 400 global config_name | |
| 401 if args.chrome: | |
| 402 config_name = 'chrome' | |
| 403 elif args.skia: | |
| 404 config_name = 'skia' | |
| 405 else: | |
| 406 config_name = 'blink' | |
| 407 print 'Fetching and analyzing most recent issues...' | |
| 408 analyzer = CQAnalyzer() | |
| 409 | |
| 410 total_fetch_count = 0 | |
| 411 issue_count = 0 | |
| 412 cursor = None | |
| 413 url_counts = collections.Counter() | |
| 414 while True: | |
| 415 search_results = fetch_recent_issues(per_request_search_limit, args, | |
| 416 cursor) | |
| 417 if len(search_results['results']) == 0: | |
| 418 break | |
| 419 cursor = search_results['cursor'] | |
| 420 total_fetch_count += per_request_search_limit | |
| 421 | |
| 422 if args.base_url_stats: | |
| 423 url_counts.update([result['base_url'] | |
| 424 for result in search_results['results']]) | |
| 425 | |
| 426 for result in search_results['results']: | |
| 427 if not matches_target_base_url(result['base_url']): | |
| 428 continue | |
| 429 issue_count += 1 | |
| 430 if args.progress and (issue_count % 100 == 0): | |
| 431 print issue_count, | |
| 432 if target_count: | |
| 433 print target_count | |
| 434 issue, now = fetch_issue(result['issue']) | |
| 435 analyzer.process_issue(issue, now) | |
| 436 if target_count and issue_count >= target_count: | |
| 437 break | |
| 438 | |
| 439 if args.base_url_stats: | |
| 440 print | |
| 441 for url, count in url_counts.most_common(): | |
| 442 matches_target = matches_target_base_url(url) | |
| 443 print '%s %s %s' % (matches_target, count, url) | |
| 444 | |
| 445 print | |
| 446 print 'Found %s issues (in the %s most recent issues).' % ( | |
| 447 issue_count, total_fetch_count) | |
| 448 cq_issues = set([attempt.issue_id for attempt in analyzer.attempts]) | |
| 449 cq_issue_count = len(cq_issues) | |
| 450 cq_patches = set([attempt.issue_patch_string | |
| 451 for attempt in analyzer.attempts]) | |
| 452 cq_patch_count = len(cq_patches) | |
| 453 print ('%s issues (%d%%) (%d patches) tried the CQ, ' | |
| 454 'resulting in %d CQ attempts.') % ( | |
| 455 cq_issue_count, 100 * cq_issue_count / issue_count, cq_patch_count, | |
| 456 len(analyzer.attempts)) | |
| 457 | |
| 458 no_try_issues = len(cq_issues & analyzer.no_try_issues) | |
| 459 gave_up_patches = len(cq_patches & analyzer.manual_commits) | |
| 460 print ('%s of those (%d%%) were committed manually (gave up). ' | |
| 461 '%s (%d%%) were marked NOTRY.') % ( | |
| 462 gave_up_patches, 100 * gave_up_patches / cq_issue_count, | |
| 463 no_try_issues, 100 * no_try_issues / cq_issue_count) | |
| 464 | |
| 465 analyzer.print_attempts_table() | |
| 466 | |
| 467 identifier_getter = operator.attrgetter('issue_patch_string') | |
| 468 attempts = sorted(analyzer.attempts, key=identifier_getter) | |
| 469 extra_user_clicks = 0 | |
| 470 eventually_success_durations = [] | |
| 471 eventually_success_attempts = [] | |
| 472 | |
| 473 # pylint: disable=W0612 | |
| 474 for identifier, group_itr in itertools.groupby(attempts, identifier_getter): | |
| 475 group = list(group_itr) | |
| 476 if group[0].issue_id in analyzer.no_try_issues: | |
| 477 continue | |
| 478 results = map(operator.attrgetter('result'), group) | |
| 479 # print '%s(%s): %s %s' % (identifier, len(group), total_time, results) | |
| 480 if 'SUCCESS' in results: | |
| 481 # 'RESTART' events may not require a user re-click? | |
| 482 user_initiated = filter(lambda result: result != 'RESTART', results) | |
| 483 extra_user_clicks += len(user_initiated) - 1 | |
| 484 durations = map(operator.attrgetter('duration'), group) | |
| 485 eventually_success_durations.append( | |
| 486 sum(durations, timedelta()).total_seconds()) | |
| 487 eventually_success_attempts.append(len(group)) | |
| 488 | |
| 489 print ('%s CQ starts (%d%%) were due a previous false-negative ' | |
| 490 '(CQ landed a patch it previously rejected)') % ( | |
| 491 extra_user_clicks, 100 * extra_user_clicks / len(analyzer.attempts)) | |
| 492 | |
| 493 print 'Patches which eventually land percentiles:' | |
| 494 for i in [10, 25, 50, 75, 90, 95, 99]: | |
| 495 eventually_success_durations_p = round(numpy.percentile( | |
| 496 eventually_success_durations, i)/(60*60), 3) | |
| 497 eventually_success_attempts_p = numpy.percentile( | |
| 498 eventually_success_attempts, i) | |
| 499 print '%d: %.3f hrs, %.3f attempts' % ( | |
| 500 i, eventually_success_durations_p, eventually_success_attempts_p) | |
| 501 | |
| 502 | |
| 503 if __name__ == '__main__': | |
| 504 sys.exit(main()) | |
| OLD | NEW |