| OLD | NEW |
| (Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 '''Generate stats of CQ usage.''' |
| 6 |
| 7 # sudo apt-get install pip; sudo pip install requests request_cache |
| 8 import argparse |
| 9 import collections |
| 10 from datetime import timedelta, datetime |
| 11 import itertools |
| 12 import numpy |
| 13 import operator |
| 14 import re |
| 15 import requests_cache |
| 16 import time |
| 17 import sys |
| 18 |
| 19 |
| 20 class Unbuffered: |
| 21 def __init__(self, stream): |
| 22 self.stream = stream |
| 23 def write(self, data): |
| 24 self.stream.write(data) |
| 25 self.stream.flush() |
| 26 def __getattr__(self, attr): |
| 27 return getattr(self.stream, attr) |
| 28 sys.stdout = Unbuffered(sys.stdout) |
| 29 |
| 30 |
| 31 CONFIG = { |
| 32 'chrome': { |
| 33 'target_base_url_regexps': [ |
| 34 re.compile(r'^svn://svn\.chromium\.org/chrome/trunk/src'), |
| 35 # pylint: disable=C0301 |
| 36 re.compile(r'^https://chromium\.googlesource\.com/chromium/chromium/src(?:
\.git)?(?:@[\w\-]+)?$'), |
| 37 re.compile(r'^http://src\.chromium\.org/chrome/trunk/src'), |
| 38 ], |
| 39 }, |
| 40 'blink': { |
| 41 'target_base_url_regexps': [ |
| 42 re.compile(r'^svn://svn\.chromium\.org/blink/(?:trunk/?)?$'), |
| 43 # pylint: disable=C0301 |
| 44 re.compile(r'^https://chromium\.googlesource\.com/chromium/blink(?:\.git)?
(?:@[\w\-]+)?$'), |
| 45 re.compile(r'^http://src\.chromium\.org/blink/(?:trunk/?)?$'), |
| 46 ], |
| 47 }, |
| 48 'skia': { |
| 49 'target_base_url_regexps': [ |
| 50 re.compile(r'^http://skia.googlecode.com/svn/trunk'), |
| 51 re.compile(r'^https://skia.googlecode.com/svn/trunk'), |
| 52 ], |
| 53 }, |
| 54 } |
| 55 config_name = None |
| 56 |
| 57 |
| 58 def make_throttle_hook(timeout=1.0): |
| 59 # pylint: disable=W0613 |
| 60 def hook(response, *args, **kwargs): |
| 61 print response.url |
| 62 if not getattr(response, 'from_cache', False): |
| 63 print 'sleeping' |
| 64 time.sleep(timeout) |
| 65 return response |
| 66 return hook |
| 67 |
| 68 |
| 69 # It seems reasonable to expect that rietveld can handle 20 query-per-second |
| 70 # load from this script. Ask someone on the infra team before reducing this |
| 71 # value. |
| 72 seconds_between_requests = 0.05 |
| 73 |
| 74 requests_cache.install_cache('cq') |
| 75 session = requests_cache.CachedSession() |
| 76 # if args.clear: |
| 77 # print 'Clearing cache...' |
| 78 # session.cache.clear() |
| 79 session.hooks = {'response': make_throttle_hook(seconds_between_requests)} |
| 80 |
| 81 |
| 82 def fetch_recent_issues(limit, args, cursor=None): |
| 83 search_url = ('https://codereview.chromium.org/search?format=json&' |
| 84 'limit=%d') % limit |
| 85 if cursor: |
| 86 search_url += '&cursor=%s' % cursor |
| 87 if args.modified_after: |
| 88 search_url += '&modified_after=%s' % args.modified_after |
| 89 if args.modified_before: |
| 90 search_url += '&modified_before=%s' % args.modified_before |
| 91 response = session.get(search_url) |
| 92 return response.json() |
| 93 |
| 94 |
| 95 def get_target_base_url_regexps(): |
| 96 return CONFIG[config_name]['target_base_url_regexps'] |
| 97 |
| 98 |
| 99 def matches_target_base_url(url): |
| 100 for regexp in get_target_base_url_regexps(): |
| 101 match = regexp.match(url) |
| 102 if match: |
| 103 return True |
| 104 return False |
| 105 |
| 106 |
| 107 def issue_patch_string(issue_id, patch_id): |
| 108 return '%s:%s' % (issue_id, patch_id) |
| 109 |
| 110 |
| 111 def issue_display_url(issue_id): |
| 112 return 'https://codereview.chromium.org/%s' % issue_id |
| 113 |
| 114 |
| 115 def issue_url(issue_id): |
| 116 return 'https://codereview.chromium.org/api/%s?messages=true' % issue_id |
| 117 |
| 118 |
| 119 def date_from_iso_str(iso_str): |
| 120 # Issues have one of two formats: |
| 121 # 2013-10-17 16:43:04.391480 |
| 122 # 2013-10-17 16:43:04 |
| 123 try: |
| 124 return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S.%f') |
| 125 except ValueError: |
| 126 return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S') |
| 127 |
| 128 |
| 129 def round_timedelta(delta): |
| 130 # We never care about the milliseconds when printing timedeltas: |
| 131 return timedelta(seconds=round(delta.total_seconds())) |
| 132 |
| 133 |
| 134 def fetch_issue(issue_id): |
| 135 url = issue_url(issue_id) |
| 136 this_json = session.get(url).json() |
| 137 # pylint: disable=W0212 |
| 138 cache_key = session.cache._url_to_key(url) |
| 139 _, now = session.cache.get_response_and_time(cache_key) |
| 140 return this_json, now |
| 141 |
| 142 |
| 143 def format_line(values, aligns, widths): |
| 144 values = map(str, values) |
| 145 args = zip(values, aligns, widths) |
| 146 return ' '.join([apply(align, (value, width)) |
| 147 for value, align, width in args]) |
| 148 |
| 149 |
| 150 class CQAttempt(object): |
| 151 def __init__(self): |
| 152 self.result = None |
| 153 self.patch_id = None |
| 154 self.issue_id = None |
| 155 self.author = None |
| 156 self.start_time = None |
| 157 self.end_time = None |
| 158 |
| 159 @property |
| 160 def issue_patch_string(self): |
| 161 return issue_patch_string(self.issue_id, self.patch_id) |
| 162 |
| 163 @property |
| 164 def duration(self): |
| 165 return self.end_time - self.start_time |
| 166 |
| 167 |
| 168 class CQAnalyzer(object): |
| 169 def __init__(self): |
| 170 self.state = None |
| 171 self.issue = None |
| 172 self.issue_id = None |
| 173 self.author = None |
| 174 self.patch_id = None |
| 175 self.start_time = None |
| 176 self.attempts = [] |
| 177 self.manual_commits = set() |
| 178 self.no_try_issues = set() |
| 179 self.now = None |
| 180 # issue_id -> time since CQ start |
| 181 self.in_progress = {} |
| 182 |
| 183 # pylint: disable=C0301 |
| 184 start_regexp = re.compile('''CQ is trying da patch. Follow status at |
| 185 https://chromium-status.appspot.com/cq/(?P<author>.*)/(?P<issue_id>\d+)/(?P<patc
h_id>\d+)''') |
| 186 committed_regexp = re.compile('Change committed as (?P<revision>\d+)') |
| 187 # pylint: disable=C0301 |
| 188 manual_commit_regexp = re.compile('Committed patchset #(?P<patch_id>\d+) manua
lly as r(?P<revision>\d+)') |
| 189 |
| 190 def _reset(self): |
| 191 self.state = None |
| 192 self.author = None |
| 193 self.patch_id = None |
| 194 self.start_time = None |
| 195 |
| 196 def _start(self, patch_id, start_time, author=None): |
| 197 if self.state == 'START' and self.patch_id == patch_id: |
| 198 # print 'Restarted for patchset %s on %s' % ( |
| 199 # patch_id, issue_display_url(self.issue_id)) |
| 200 self._end('RESTART', start_time) |
| 201 self.state = 'START' |
| 202 self.patch_id = patch_id |
| 203 self.author = author |
| 204 assert start_time |
| 205 self.start_time = start_time |
| 206 |
| 207 def _end(self, result, end_time): |
| 208 if self.state != 'START': |
| 209 print 'ERROR: %s for %s without start??' % ( |
| 210 result, issue_display_url(self.issue_id)) |
| 211 self._reset() |
| 212 return |
| 213 attempt = CQAttempt() |
| 214 attempt.result = result |
| 215 attempt.patch_id = self.patch_id |
| 216 attempt.issue_id = self.issue_id |
| 217 attempt.start_time = self.start_time |
| 218 attempt.end_time = end_time |
| 219 self.attempts.append(attempt) |
| 220 self._reset() |
| 221 |
| 222 def _start_and_end(self, result, date): |
| 223 self._start(result, date) |
| 224 self._end(result, date) |
| 225 |
| 226 def _manual_commit(self, patch_id): |
| 227 self.manual_commits.add(issue_patch_string(self.issue_id, patch_id)) |
| 228 |
| 229 def process_message(self, message): |
| 230 text = message['text'] |
| 231 date = date_from_iso_str(message['date']) |
| 232 |
| 233 match = self.manual_commit_regexp.match(text) |
| 234 if match: |
| 235 if self.start_time: |
| 236 self._end('MANUAL_COMMIT', |
| 237 date_from_iso_str(self.issue['modified'])) |
| 238 patch_number = int(match.group('patch_id')) |
| 239 # gcl dcommit talks in terms of patch numbers 1-5, we need to map |
| 240 # those to patchset ids: |
| 241 if len(self.issue['patchsets']) > (patch_number-1): |
| 242 patch_id = self.issue['patchsets'][patch_number-1] |
| 243 else: |
| 244 print '%s missing patch number %d' % ( |
| 245 self.issue_id, patch_number) |
| 246 print self.issue['patchsets'] |
| 247 print 'issue, now = fetch_issue(\'%s\')' % self.issue_id |
| 248 patch_id = self.issue['patchsets'][-1] |
| 249 self._manual_commit(patch_id) |
| 250 return |
| 251 |
| 252 if message['sender'] != 'commit-bot@chromium.org': |
| 253 return |
| 254 |
| 255 match = self.start_regexp.match(text) |
| 256 if match: |
| 257 self._start(match.group('patch_id'), date, match.group('author')) |
| 258 return |
| 259 match = self.committed_regexp.match(text) |
| 260 if match: |
| 261 self._end('SUCCESS', date) |
| 262 return |
| 263 |
| 264 cq_end_messages = { |
| 265 'TRY': 'Retried try job', |
| 266 'APPLY': 'Failed to apply patch', |
| 267 'APPLY2': 'Failed to apply the patch', |
| 268 'COMPILE': 'Sorry for I got bad news for ya', |
| 269 'DESCRIPTION_CHANGED': ('Commit queue rejected this change because ' |
| 270 'the description'), |
| 271 # This is too conservative. |
| 272 'REVIEWERS_CHANGED': 'List of reviewers changed.', |
| 273 # User caused. |
| 274 'PATCH_CHANGED': 'Commit queue failed due to new patchset.', |
| 275 # FAILED_TO_TRIGGER is a very serious failure, unclear why it |
| 276 # happens! |
| 277 'FAILED_TO_TRIGGER': 'Failed to trigger a try job on', |
| 278 # BINARY_FILE is just a bug in the CQ. |
| 279 'BINARY_FILE': 'Can\'t process patch for file', |
| 280 'BINARY_FILE2': 'Failed to request the patch to try', |
| 281 # Unclear why UPDATE_STEP happens. Likely broken bots, shouldn't |
| 282 # fail patches! |
| 283 'UPDATE_STEP': 'Step "update" is always a major failure.', |
| 284 'BERSERK': 'The commit queue went berserk', |
| 285 'INTERNAL_ERROR': 'Commit queue had an internal error.', |
| 286 } |
| 287 for result, message_text in cq_end_messages.items(): |
| 288 if text.startswith(message_text): |
| 289 self._end(result, date) |
| 290 return |
| 291 |
| 292 cq_fail_to_start_messsages = { |
| 293 'FAILED_PATCHSET': 'Failed to get patchset properties', |
| 294 'NO_COMMENTS': 'No comments yet.', |
| 295 'NO_LGTM': 'No LGTM from a valid reviewer yet.', |
| 296 } |
| 297 for result, message_text in cq_fail_to_start_messsages.items(): |
| 298 if text.startswith(message_text): |
| 299 self._start_and_end(result, date) |
| 300 return |
| 301 |
| 302 print 'Error %s has unknown message: %s' % ( |
| 303 issue_display_url(self.issue_id), text) |
| 304 self._end('UNKNOWN', date) |
| 305 |
| 306 def start_issue(self, issue, now): |
| 307 # We may be dealing with cached data, so cache our concept of "now" |
| 308 self.now = now |
| 309 self.issue = issue |
| 310 self.issue_id = issue['issue'] |
| 311 |
| 312 def end_issue(self, issue): |
| 313 issue_id = issue['issue'] |
| 314 assert issue_id == self.issue_id |
| 315 if self.start_time: |
| 316 if self.issue['commit']: |
| 317 print '%s in progress? %s' % ( |
| 318 round_timedelta(self.now - self.start_time), |
| 319 issue_display_url(self.issue_id)) |
| 320 self._end('IN_PROGRESS', self.now) |
| 321 else: |
| 322 # There are various reasons why the commit box may have |
| 323 # been unchecked, including that the issue may be closed now. |
| 324 # If those are not from a user, then that's a bug in rietveld |
| 325 # and out of scope for this script. For now we're just ignoring |
| 326 # cases where the box was unchecked. We could log an _end() |
| 327 # event, but which time would we use? last_modified has at |
| 328 # best a weak corrolation with the issue close or CQ checkbox. |
| 329 self._reset() |
| 330 # _reset() shouldn't be needed, but just in case. |
| 331 self._reset() |
| 332 self.issue_id = None |
| 333 self.issue = None |
| 334 |
| 335 def process_issue(self, issue, now): |
| 336 issue_id = issue['issue'] |
| 337 |
| 338 if re.search('^NOTRY=[tT][rR][uU][eE]$', issue['description'], re.M): |
| 339 self.no_try_issues.add(issue_id) |
| 340 |
| 341 self.start_issue(issue, now) |
| 342 for message in issue['messages']: |
| 343 self.process_message(message) |
| 344 # print '%(date)s: %(text)s' % message |
| 345 self.end_issue(issue) |
| 346 |
| 347 def print_attempts_table(self): |
| 348 result_getter = operator.attrgetter('result') |
| 349 attempts = sorted(self.attempts, key=result_getter) |
| 350 |
| 351 col_aligns = (str.rjust, str.ljust, str.rjust, str.rjust) |
| 352 col_widths = (5, 20, 16, 16) |
| 353 col_headers = ('count', 'result', 'mean duration', 'median duration') |
| 354 print |
| 355 print ('CQ Attempts by result (there can be more than ' |
| 356 'one attempt on the same patch)') |
| 357 print format_line(col_headers, col_aligns, col_widths) |
| 358 print '-' * (sum(col_widths) + len(col_widths) - 1) |
| 359 for result, group_itr in itertools.groupby(attempts, result_getter): |
| 360 group = list(group_itr) |
| 361 durations = map(operator.attrgetter('duration'), group) |
| 362 mean = sum(durations, timedelta()) / len(group) |
| 363 durations_seconds = map(timedelta.total_seconds, durations) |
| 364 median_seconds = numpy.median(durations_seconds) |
| 365 median = timedelta(seconds=round(median_seconds)) |
| 366 values = (len(group), result, round_timedelta(mean), median) |
| 367 print format_line(values, col_aligns, col_widths) |
| 368 print |
| 369 |
| 370 |
| 371 def main(): |
| 372 parser = argparse.ArgumentParser() |
| 373 parser.add_argument('--base-url-stats', action='store_true', |
| 374 help='print base_url matching stats') |
| 375 parser.add_argument('--chrome', action='store_true') |
| 376 parser.add_argument('--clear', action='store_true', |
| 377 help='clear the cache before starting') |
| 378 parser.add_argument('--count', type=int, default=None, |
| 379 help='number of issues to fetch') |
| 380 parser.add_argument('--modified-after', default=None, |
| 381 help=('search for issues modified after ' |
| 382 'this date YYYY-MM-DD')) |
| 383 parser.add_argument('--modified-before', default=None, |
| 384 help=('search for issues modified before ' |
| 385 'this date YYYY-MM-DD')) |
| 386 parser.add_argument('--progress', help='Show progress', action='store_true') |
| 387 parser.add_argument('--skia', action='store_true') |
| 388 args = parser.parse_args() |
| 389 |
| 390 # Rietveld allows up to 1000 results per search query. I don't know if |
| 391 # that's better for the server or not. We'll use 100 to be safe. |
| 392 per_request_search_limit = 100 |
| 393 if args.count: |
| 394 per_request_search_limit = min(int(args.count/2), 100) |
| 395 # Currently looking at the last 2000 entries. |
| 396 target_count = args.count |
| 397 if args.clear: |
| 398 print 'Clearing cache...' |
| 399 session.cache.clear() |
| 400 global config_name |
| 401 if args.chrome: |
| 402 config_name = 'chrome' |
| 403 elif args.skia: |
| 404 config_name = 'skia' |
| 405 else: |
| 406 config_name = 'blink' |
| 407 print 'Fetching and analyzing most recent issues...' |
| 408 analyzer = CQAnalyzer() |
| 409 |
| 410 total_fetch_count = 0 |
| 411 issue_count = 0 |
| 412 cursor = None |
| 413 url_counts = collections.Counter() |
| 414 while True: |
| 415 search_results = fetch_recent_issues(per_request_search_limit, args, |
| 416 cursor) |
| 417 if len(search_results['results']) == 0: |
| 418 break |
| 419 cursor = search_results['cursor'] |
| 420 total_fetch_count += per_request_search_limit |
| 421 |
| 422 if args.base_url_stats: |
| 423 url_counts.update([result['base_url'] |
| 424 for result in search_results['results']]) |
| 425 |
| 426 for result in search_results['results']: |
| 427 if not matches_target_base_url(result['base_url']): |
| 428 continue |
| 429 issue_count += 1 |
| 430 if args.progress and (issue_count % 100 == 0): |
| 431 print issue_count, |
| 432 if target_count: |
| 433 print target_count |
| 434 issue, now = fetch_issue(result['issue']) |
| 435 analyzer.process_issue(issue, now) |
| 436 if target_count and issue_count >= target_count: |
| 437 break |
| 438 |
| 439 if args.base_url_stats: |
| 440 print |
| 441 for url, count in url_counts.most_common(): |
| 442 matches_target = matches_target_base_url(url) |
| 443 print '%s %s %s' % (matches_target, count, url) |
| 444 |
| 445 print |
| 446 print 'Found %s issues (in the %s most recent issues).' % ( |
| 447 issue_count, total_fetch_count) |
| 448 cq_issues = set([attempt.issue_id for attempt in analyzer.attempts]) |
| 449 cq_issue_count = len(cq_issues) |
| 450 cq_patches = set([attempt.issue_patch_string |
| 451 for attempt in analyzer.attempts]) |
| 452 cq_patch_count = len(cq_patches) |
| 453 print ('%s issues (%d%%) (%d patches) tried the CQ, ' |
| 454 'resulting in %d CQ attempts.') % ( |
| 455 cq_issue_count, 100 * cq_issue_count / issue_count, cq_patch_count, |
| 456 len(analyzer.attempts)) |
| 457 |
| 458 no_try_issues = len(cq_issues & analyzer.no_try_issues) |
| 459 gave_up_patches = len(cq_patches & analyzer.manual_commits) |
| 460 print ('%s of those (%d%%) were committed manually (gave up). ' |
| 461 '%s (%d%%) were marked NOTRY.') % ( |
| 462 gave_up_patches, 100 * gave_up_patches / cq_issue_count, |
| 463 no_try_issues, 100 * no_try_issues / cq_issue_count) |
| 464 |
| 465 analyzer.print_attempts_table() |
| 466 |
| 467 identifier_getter = operator.attrgetter('issue_patch_string') |
| 468 attempts = sorted(analyzer.attempts, key=identifier_getter) |
| 469 extra_user_clicks = 0 |
| 470 eventually_success_durations = [] |
| 471 eventually_success_attempts = [] |
| 472 |
| 473 # pylint: disable=W0612 |
| 474 for identifier, group_itr in itertools.groupby(attempts, identifier_getter): |
| 475 group = list(group_itr) |
| 476 if group[0].issue_id in analyzer.no_try_issues: |
| 477 continue |
| 478 results = map(operator.attrgetter('result'), group) |
| 479 # print '%s(%s): %s %s' % (identifier, len(group), total_time, results) |
| 480 if 'SUCCESS' in results: |
| 481 # 'RESTART' events may not require a user re-click? |
| 482 user_initiated = filter(lambda result: result != 'RESTART', results) |
| 483 extra_user_clicks += len(user_initiated) - 1 |
| 484 durations = map(operator.attrgetter('duration'), group) |
| 485 eventually_success_durations.append( |
| 486 sum(durations, timedelta()).total_seconds()) |
| 487 eventually_success_attempts.append(len(group)) |
| 488 |
| 489 print ('%s CQ starts (%d%%) were due a previous false-negative ' |
| 490 '(CQ landed a patch it previously rejected)') % ( |
| 491 extra_user_clicks, 100 * extra_user_clicks / len(analyzer.attempts)) |
| 492 |
| 493 print 'Patches which eventually land percentiles:' |
| 494 for i in [10, 25, 50, 75, 90, 95, 99]: |
| 495 eventually_success_durations_p = round(numpy.percentile( |
| 496 eventually_success_durations, i)/(60*60), 3) |
| 497 eventually_success_attempts_p = numpy.percentile( |
| 498 eventually_success_attempts, i) |
| 499 print '%d: %.3f hrs, %.3f attempts' % ( |
| 500 i, eventually_success_durations_p, eventually_success_attempts_p) |
| 501 |
| 502 |
| 503 if __name__ == '__main__': |
| 504 sys.exit(main()) |
| OLD | NEW |