tools/stats.py - Issue 121763002: Add stats script.

Unified Diff: tools/stats.py

Issue 121763002: Add stats script. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/commit-queue

Patch Set: Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/stats.py

diff --git a/tools/stats.py b/tools/stats.py

new file mode 100755

index 0000000000000000000000000000000000000000..1b906282e10c66509c6880a8746ef3d43835df9d

--- /dev/null

+++ b/tools/stats.py

@@ -0,0 +1,504 @@

+#!/usr/bin/python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+'''Generate stats of CQ usage.'''

+# sudo apt-get install pip; sudo pip install requests request_cache

+import argparse

+import collections

+from datetime import timedelta, datetime

+import itertools

+import numpy

+import operator

+import re

+import requests_cache

+import time

+import sys

+class Unbuffered:

+ def __init__(self, stream):

+ self.stream = stream

+ def write(self, data):

+ self.stream.write(data)

+ self.stream.flush()

+ def __getattr__(self, attr):

+ return getattr(self.stream, attr)

+sys.stdout = Unbuffered(sys.stdout)

+CONFIG = {

+ 'chrome': {

+ 'target_base_url_regexps': [

+ re.compile(r'^svn://svn\.chromium\.org/chrome/trunk/src'),

+ # pylint: disable=C0301

+ re.compile(r'^https://chromium\.googlesource\.com/chromium/chromium/src(?:\.git)?(?:@[\w\-]+)?$'),

+ re.compile(r'^http://src\.chromium\.org/chrome/trunk/src'),

+ ],

+ },

+ 'blink': {

+ 'target_base_url_regexps': [

+ re.compile(r'^svn://svn\.chromium\.org/blink/(?:trunk/?)?$'),

+ # pylint: disable=C0301

+ re.compile(r'^https://chromium\.googlesource\.com/chromium/blink(?:\.git)?(?:@[\w\-]+)?$'),

+ re.compile(r'^http://src\.chromium\.org/blink/(?:trunk/?)?$'),

+ ],

+ },

+ 'skia': {

+ 'target_base_url_regexps': [

+ re.compile(r'^http://skia.googlecode.com/svn/trunk'),

+ re.compile(r'^https://skia.googlecode.com/svn/trunk'),

+ ],

+ },

+config_name = None

+def make_throttle_hook(timeout=1.0):

+ # pylint: disable=W0613

+ def hook(response, *args, **kwargs):

+ print response.url

+ if not getattr(response, 'from_cache', False):

+ print 'sleeping'

+ time.sleep(timeout)

+ return response

+ return hook

+# It seems reasonable to expect that rietveld can handle 20 query-per-second

+# load from this script. Ask someone on the infra team before reducing this

+# value.

+seconds_between_requests = 0.05

+requests_cache.install_cache('cq')

+session = requests_cache.CachedSession()

+# if args.clear:

+# print 'Clearing cache...'

+# session.cache.clear()

+session.hooks = {'response': make_throttle_hook(seconds_between_requests)}

+def fetch_recent_issues(limit, args, cursor=None):

+ search_url = ('https://codereview.chromium.org/search?format=json&'

+ 'limit=%d') % limit

+ if cursor:

+ search_url += '&cursor=%s' % cursor

+ if args.modified_after:

+ search_url += '&modified_after=%s' % args.modified_after

+ if args.modified_before:

+ search_url += '&modified_before=%s' % args.modified_before

+ response = session.get(search_url)

+ return response.json()

+def get_target_base_url_regexps():

+ return CONFIG[config_name]['target_base_url_regexps']

+def matches_target_base_url(url):

+ for regexp in get_target_base_url_regexps():

+ match = regexp.match(url)

+ if match:

+ return True

+ return False

+def issue_patch_string(issue_id, patch_id):

+ return '%s:%s' % (issue_id, patch_id)

+def issue_display_url(issue_id):

+ return 'https://codereview.chromium.org/%s' % issue_id

+def issue_url(issue_id):

+ return 'https://codereview.chromium.org/api/%s?messages=true' % issue_id

+def date_from_iso_str(iso_str):

+ # Issues have one of two formats:

+ # 2013-10-17 16:43:04.391480

+ # 2013-10-17 16:43:04

+ try:

+ return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S.%f')

+ except ValueError:

+ return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S')

+def round_timedelta(delta):

+ # We never care about the milliseconds when printing timedeltas:

+ return timedelta(seconds=round(delta.total_seconds()))

+def fetch_issue(issue_id):

+ url = issue_url(issue_id)

+ this_json = session.get(url).json()

+ # pylint: disable=W0212

+ cache_key = session.cache._url_to_key(url)

+ _, now = session.cache.get_response_and_time(cache_key)

+ return this_json, now

+def format_line(values, aligns, widths):

+ values = map(str, values)

+ args = zip(values, aligns, widths)

+ return ' '.join([apply(align, (value, width))

+ for value, align, width in args])

+class CQAttempt(object):

+ def __init__(self):

+ self.result = None

+ self.patch_id = None

+ self.issue_id = None

+ self.author = None

+ self.start_time = None

+ self.end_time = None

+ @property

+ def issue_patch_string(self):

+ return issue_patch_string(self.issue_id, self.patch_id)

+ @property

+ def duration(self):

+ return self.end_time - self.start_time

+class CQAnalyzer(object):

+ def __init__(self):

+ self.state = None

+ self.issue = None

+ self.issue_id = None

+ self.author = None

+ self.patch_id = None

+ self.start_time = None

+ self.attempts = []

+ self.manual_commits = set()

+ self.no_try_issues = set()

+ self.now = None

+ # issue_id -> time since CQ start

+ self.in_progress = {}

+ # pylint: disable=C0301

+ start_regexp = re.compile('''CQ is trying da patch. Follow status at

+https://chromium-status.appspot.com/cq/(?P<author>.*)/(?P<issue_id>\d+)/(?P<patch_id>\d+)''')

+ committed_regexp = re.compile('Change committed as (?P<revision>\d+)')

+ # pylint: disable=C0301

+ manual_commit_regexp = re.compile('Committed patchset #(?P<patch_id>\d+) manually as r(?P<revision>\d+)')

+ def _reset(self):

+ self.state = None

+ self.author = None

+ self.patch_id = None

+ self.start_time = None

+ def _start(self, patch_id, start_time, author=None):

+ if self.state == 'START' and self.patch_id == patch_id:

+ # print 'Restarted for patchset %s on %s' % (

+ # patch_id, issue_display_url(self.issue_id))

+ self._end('RESTART', start_time)

+ self.state = 'START'

+ self.patch_id = patch_id

+ self.author = author

+ assert start_time

+ self.start_time = start_time

+ def _end(self, result, end_time):

+ if self.state != 'START':

+ print 'ERROR: %s for %s without start??' % (

+ result, issue_display_url(self.issue_id))

+ self._reset()

+ return

+ attempt = CQAttempt()

+ attempt.result = result

+ attempt.patch_id = self.patch_id

+ attempt.issue_id = self.issue_id

+ attempt.start_time = self.start_time

+ attempt.end_time = end_time

+ self.attempts.append(attempt)

+ self._reset()

+ def _start_and_end(self, result, date):

+ self._start(result, date)

+ self._end(result, date)

+ def _manual_commit(self, patch_id):

+ self.manual_commits.add(issue_patch_string(self.issue_id, patch_id))

+ def process_message(self, message):

+ text = message['text']

+ date = date_from_iso_str(message['date'])

+ match = self.manual_commit_regexp.match(text)

+ if match:

+ if self.start_time:

+ self._end('MANUAL_COMMIT',

+ date_from_iso_str(self.issue['modified']))

+ patch_number = int(match.group('patch_id'))

+ # gcl dcommit talks in terms of patch numbers 1-5, we need to map

+ # those to patchset ids:

+ if len(self.issue['patchsets']) > (patch_number-1):

+ patch_id = self.issue['patchsets'][patch_number-1]

+ else:

+ print '%s missing patch number %d' % (

+ self.issue_id, patch_number)

+ print self.issue['patchsets']

+ print 'issue, now = fetch_issue(\'%s\')' % self.issue_id

+ patch_id = self.issue['patchsets'][-1]

+ self._manual_commit(patch_id)

+ return

+ if message['sender'] != 'commit-bot@chromium.org':

+ return

+ match = self.start_regexp.match(text)

+ if match:

+ self._start(match.group('patch_id'), date, match.group('author'))

+ return

+ match = self.committed_regexp.match(text)

+ if match:

+ self._end('SUCCESS', date)

+ return

+ cq_end_messages = {

+ 'TRY': 'Retried try job',

+ 'APPLY': 'Failed to apply patch',

+ 'APPLY2': 'Failed to apply the patch',

+ 'COMPILE': 'Sorry for I got bad news for ya',

+ 'DESCRIPTION_CHANGED': ('Commit queue rejected this change because '

+ 'the description'),

+ # This is too conservative.

+ 'REVIEWERS_CHANGED': 'List of reviewers changed.',

+ # User caused.

+ 'PATCH_CHANGED': 'Commit queue failed due to new patchset.',

+ # FAILED_TO_TRIGGER is a very serious failure, unclear why it

+ # happens!

+ 'FAILED_TO_TRIGGER': 'Failed to trigger a try job on',

+ # BINARY_FILE is just a bug in the CQ.

+ 'BINARY_FILE': 'Can\'t process patch for file',

+ 'BINARY_FILE2': 'Failed to request the patch to try',

+ # Unclear why UPDATE_STEP happens. Likely broken bots, shouldn't

+ # fail patches!

+ 'UPDATE_STEP': 'Step "update" is always a major failure.',

+ 'BERSERK': 'The commit queue went berserk',

+ 'INTERNAL_ERROR': 'Commit queue had an internal error.',

+ }

+ for result, message_text in cq_end_messages.items():

+ if text.startswith(message_text):

+ self._end(result, date)

+ return

+ cq_fail_to_start_messsages = {

+ 'FAILED_PATCHSET': 'Failed to get patchset properties',

+ 'NO_COMMENTS': 'No comments yet.',

+ 'NO_LGTM': 'No LGTM from a valid reviewer yet.',

+ }

+ for result, message_text in cq_fail_to_start_messsages.items():

+ if text.startswith(message_text):

+ self._start_and_end(result, date)

+ return

+ print 'Error %s has unknown message: %s' % (

+ issue_display_url(self.issue_id), text)

+ self._end('UNKNOWN', date)

+ def start_issue(self, issue, now):

+ # We may be dealing with cached data, so cache our concept of "now"

+ self.now = now

+ self.issue = issue

+ self.issue_id = issue['issue']

+ def end_issue(self, issue):

+ issue_id = issue['issue']

+ assert issue_id == self.issue_id

+ if self.start_time:

+ if self.issue['commit']:

+ print '%s in progress? %s' % (

+ round_timedelta(self.now - self.start_time),

+ issue_display_url(self.issue_id))

+ self._end('IN_PROGRESS', self.now)

+ else:

+ # There are various reasons why the commit box may have

+ # been unchecked, including that the issue may be closed now.

+ # If those are not from a user, then that's a bug in rietveld

+ # and out of scope for this script. For now we're just ignoring

+ # cases where the box was unchecked. We could log an _end()

+ # event, but which time would we use? last_modified has at

+ # best a weak corrolation with the issue close or CQ checkbox.

+ self._reset()

+ # _reset() shouldn't be needed, but just in case.

+ self._reset()

+ self.issue_id = None

+ self.issue = None

+ def process_issue(self, issue, now):

+ issue_id = issue['issue']

+ if re.search('^NOTRY=[tT][rR][uU][eE]$', issue['description'], re.M):

+ self.no_try_issues.add(issue_id)

+ self.start_issue(issue, now)

+ for message in issue['messages']:

+ self.process_message(message)

+ # print '%(date)s: %(text)s' % message

+ self.end_issue(issue)

+ def print_attempts_table(self):

+ result_getter = operator.attrgetter('result')

+ attempts = sorted(self.attempts, key=result_getter)

+ col_aligns = (str.rjust, str.ljust, str.rjust, str.rjust)

+ col_widths = (5, 20, 16, 16)

+ col_headers = ('count', 'result', 'mean duration', 'median duration')

+ print

+ print ('CQ Attempts by result (there can be more than '

+ 'one attempt on the same patch)')

+ print format_line(col_headers, col_aligns, col_widths)

+ print '-' * (sum(col_widths) + len(col_widths) - 1)

+ for result, group_itr in itertools.groupby(attempts, result_getter):

+ group = list(group_itr)

+ durations = map(operator.attrgetter('duration'), group)

+ mean = sum(durations, timedelta()) / len(group)

+ durations_seconds = map(timedelta.total_seconds, durations)

+ median_seconds = numpy.median(durations_seconds)

+ median = timedelta(seconds=round(median_seconds))

+ values = (len(group), result, round_timedelta(mean), median)

+ print format_line(values, col_aligns, col_widths)

+ print

+def main():

+ parser = argparse.ArgumentParser()

+ parser.add_argument('--base-url-stats', action='store_true',

+ help='print base_url matching stats')

+ parser.add_argument('--chrome', action='store_true')

+ parser.add_argument('--clear', action='store_true',

+ help='clear the cache before starting')

+ parser.add_argument('--count', type=int, default=None,

+ help='number of issues to fetch')

+ parser.add_argument('--modified-after', default=None,

+ help=('search for issues modified after '

+ 'this date YYYY-MM-DD'))

+ parser.add_argument('--modified-before', default=None,

+ help=('search for issues modified before '

+ 'this date YYYY-MM-DD'))

+ parser.add_argument('--progress', help='Show progress', action='store_true')

+ parser.add_argument('--skia', action='store_true')

+ args = parser.parse_args()

+ # Rietveld allows up to 1000 results per search query. I don't know if

+ # that's better for the server or not. We'll use 100 to be safe.

+ per_request_search_limit = 100

+ if args.count:

+ per_request_search_limit = min(int(args.count/2), 100)

+ # Currently looking at the last 2000 entries.

+ target_count = args.count

+ if args.clear:

+ print 'Clearing cache...'

+ session.cache.clear()

+ global config_name

+ if args.chrome:

+ config_name = 'chrome'

+ elif args.skia:

+ config_name = 'skia'

+ else:

+ config_name = 'blink'

+ print 'Fetching and analyzing most recent issues...'

+ analyzer = CQAnalyzer()

+ total_fetch_count = 0

+ issue_count = 0

+ cursor = None

+ url_counts = collections.Counter()

+ while True:

+ search_results = fetch_recent_issues(per_request_search_limit, args,

+ cursor)

+ if len(search_results['results']) == 0:

+ break

+ cursor = search_results['cursor']

+ total_fetch_count += per_request_search_limit

+ if args.base_url_stats:

+ url_counts.update([result['base_url']

+ for result in search_results['results']])

+ for result in search_results['results']:

+ if not matches_target_base_url(result['base_url']):

+ continue

+ issue_count += 1

+ if args.progress and (issue_count % 100 == 0):

+ print issue_count,

+ if target_count:

+ print target_count

+ issue, now = fetch_issue(result['issue'])

+ analyzer.process_issue(issue, now)

+ if target_count and issue_count >= target_count:

+ break

+ if args.base_url_stats:

+ print

+ for url, count in url_counts.most_common():

+ matches_target = matches_target_base_url(url)

+ print '%s %s %s' % (matches_target, count, url)

+ print

+ print 'Found %s issues (in the %s most recent issues).' % (

+ issue_count, total_fetch_count)

+ cq_issues = set([attempt.issue_id for attempt in analyzer.attempts])

+ cq_issue_count = len(cq_issues)

+ cq_patches = set([attempt.issue_patch_string

+ for attempt in analyzer.attempts])

+ cq_patch_count = len(cq_patches)

+ print ('%s issues (%d%%) (%d patches) tried the CQ, '

+ 'resulting in %d CQ attempts.') % (

+ cq_issue_count, 100 * cq_issue_count / issue_count, cq_patch_count,

+ len(analyzer.attempts))

+ no_try_issues = len(cq_issues & analyzer.no_try_issues)

+ gave_up_patches = len(cq_patches & analyzer.manual_commits)

+ print ('%s of those (%d%%) were committed manually (gave up). '

+ '%s (%d%%) were marked NOTRY.') % (

+ gave_up_patches, 100 * gave_up_patches / cq_issue_count,

+ no_try_issues, 100 * no_try_issues / cq_issue_count)

+ analyzer.print_attempts_table()

+ identifier_getter = operator.attrgetter('issue_patch_string')

+ attempts = sorted(analyzer.attempts, key=identifier_getter)

+ extra_user_clicks = 0

+ eventually_success_durations = []

+ eventually_success_attempts = []

+ # pylint: disable=W0612

+ for identifier, group_itr in itertools.groupby(attempts, identifier_getter):

+ group = list(group_itr)

+ if group[0].issue_id in analyzer.no_try_issues:

+ continue

+ results = map(operator.attrgetter('result'), group)

+ # print '%s(%s): %s %s' % (identifier, len(group), total_time, results)

+ if 'SUCCESS' in results:

+ # 'RESTART' events may not require a user re-click?

+ user_initiated = filter(lambda result: result != 'RESTART', results)

+ extra_user_clicks += len(user_initiated) - 1

+ durations = map(operator.attrgetter('duration'), group)

+ eventually_success_durations.append(

+ sum(durations, timedelta()).total_seconds())

+ eventually_success_attempts.append(len(group))

+ print ('%s CQ starts (%d%%) were due a previous false-negative '

+ '(CQ landed a patch it previously rejected)') % (

+ extra_user_clicks, 100 * extra_user_clicks / len(analyzer.attempts))

+ print 'Patches which eventually land percentiles:'

+ for i in [10, 25, 50, 75, 90, 95, 99]:

+ eventually_success_durations_p = round(numpy.percentile(

+ eventually_success_durations, i)/(60*60), 3)

+ eventually_success_attempts_p = numpy.percentile(

+ eventually_success_attempts, i)

+ print '%d: %.3f hrs, %.3f attempts' % (

+ i, eventually_success_durations_p, eventually_success_attempts_p)

+if __name__ == '__main__':

+ sys.exit(main())

« no previous file with comments | « tools/gather_stats.sh ('k') | no next file » | no next file with comments »