Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(591)

Unified Diff: infra/tools/cq_stats/cq_stats.py

Issue 1125653004: Check in cq_stats.py (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: use utc Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « infra/tools/cq_stats/__main__.py ('k') | infra/tools/cq_stats/test/__init__.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: infra/tools/cq_stats/cq_stats.py
diff --git a/infra/tools/cq_stats/cq_stats.py b/infra/tools/cq_stats/cq_stats.py
new file mode 100755
index 0000000000000000000000000000000000000000..2f296761a10cd32e86bd2103686d31cc4f6ddb4a
--- /dev/null
+++ b/infra/tools/cq_stats/cq_stats.py
@@ -0,0 +1,1206 @@
+#!/usr/bin/python
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+'''Generate stats of CQ usage.'''
+
+import argparse
+import calendar
+import collections
+import datetime
+import dateutil.parser
+import dateutil.tz
+from xml.etree import ElementTree
+import infra.libs.logs
+import json
+import logging
+from multiprocessing.pool import ThreadPool
+import numbers
+import numpy
+import re
+import subprocess
+import sys
+import time
+import urllib
+import urllib2
+import urlparse
+
+
+STATS_URL = 'http://chromium-cq-status.appspot.com'
+# Expects % project.
+TREE_STATUS_URL = 'http://%s-status.appspot.com'
+PROJECTS = {
+ 'chromium': {
+ 'tree-status': TREE_STATUS_URL % 'chromium',
+ 'type': 'git',
+ 'repo': 'https://chromium.googlesource.com/chromium/src',
+ },
+ 'blink': {
+ 'tree-status': TREE_STATUS_URL % 'blink',
+ 'type': 'svn',
+ 'repo': 'svn://svn.chromium.org/blink/trunk/',
+ },
+}
+# Map of intervals to minutes.
+INTERVALS = {
+ 'week': 60 * 24 * 7,
+ 'day': 60 * 24,
+ 'hour': 60,
+ '15min': 15,
+}
+VALID_REASONS = collections.OrderedDict([
+ ('manual-cancel', {
+ 'item': 'stopped manually',
+ 'message': 'stopped manually (CQ box was unchecked)',
+ }),
+ ('missing-lgtm', {
+ 'item': 'missed LGTM',
+ 'message': 'are missing LGTM',
+ }),
+ ('not-lgtm', {
+ 'item': 'NOT LGTMs',
+ 'message': 'have been disapproved (NOT LGTM)',
+ }),
+ ('failed-patch', {
+ 'item': 'failures',
+ 'message': 'failed to apply patch',
+ }),
+ ('invalid-delimiter', {
+ 'item': 'errors',
+ 'message': 'have incorrect CQ_EXTRA_TRYBOTS flag',
+ }),
+ ('failed-presubmit-bot', {
+ 'item': 'failures',
+ 'message': 'failed presubmit bot (often due to missing OWNERS LGTM)',
+ }),
+])
+FLAKY_REASONS = collections.OrderedDict([
+ ('failed-commit', {
+ 'item': 'failures',
+ 'message': 'failed to commit',
+ }),
+ ('failed-jobs', {
+ 'item': 'failures',
+ 'message': 'failed jobs (excluding presubmit)',
+ }),
+ ('failed-to-trigger', {
+ 'item': 'failures',
+ 'message': 'failed to trigger jobs',
+ }),
+ ('failed-presubmit-check', {
+ 'item': 'failures',
+ 'message': 'failed presubmit check',
+ }),
+])
+KNOWN_REASONS = collections.OrderedDict()
+KNOWN_REASONS.update(VALID_REASONS)
+KNOWN_REASONS.update(FLAKY_REASONS)
+
+REASONS = collections.OrderedDict()
+REASONS.update(KNOWN_REASONS)
+REASONS['failed-unknown'] = {
+ 'item': 'failures',
+ 'message': 'failed for any other reason',
+}
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description=sys.modules['__main__'].__doc__)
+ parser.add_argument(
+ '--project',
+ required=True,
+ choices=PROJECTS.keys(),
+ help='Collect stats about this project.')
+ parser.add_argument(
+ '--bot', type=str, dest='bots',
+ action='append',
+ default=['blink-deps-roller@chromium.org',
+ 'chrome-admin@google.com',
+ 'chrome-release@google.com',
+ 'chromeos-commit-bot@chromium.org',
+ 'skia-deps-roller@chromium.org',
+ ],
+ help=('Add an author to be treated as a bot. '
+ 'Repeat to add several bots. Default: %(default)s.'))
+ parser.add_argument(
+ '--seq', action='store_true',
+ help='Run everything sequentially for debugging.')
+ parser.add_argument(
+ '--thread-pool', type=int, default=200,
+ help='Fetch data using this many parallel threads. Default=%(default)s.')
+ parser.add_argument(
+ '--list-rejections', action='store_true',
+ help='List rejected CLs and reasons for rejection.')
+ parser.add_argument(
+ '--list-false-rejections', action='store_true',
+ help='List CLs that were committed in more than one attempt.')
+ parser.add_argument(
+ '--use-logs', action='store_true',
+ default=True,
+ help=('On by default. '
+ 'Fetch the detailed logs and recompute the stats in this script, '
+ 'instead of fetching precomputed stats. '
+ 'Slower, but more accurate, and helps validate the cached stats.'))
+ parser.add_argument(
+ '--use-cache',
+ dest='use_logs',
+ action='store_false',
+ help=('Fetch the cached stats from the app. Opposite to --use-logs.'))
+ parser.add_argument(
+ '--date',
+ help='Start date of stats YYYY-MM-DD[ HH[:MM]]. Default: --range ago.')
+ parser.add_argument('--range',
+ choices=INTERVALS.keys(),
+ default='week',
+ help='Time range to print stats for.')
+ infra.libs.logs.add_argparse_options(parser, default_level=logging.ERROR)
+
+ args = parser.parse_args()
+
+ if args.date:
+ args.date = date_from_string(args.date)
+ else:
+ args.date = (datetime.datetime.now() -
+ datetime.timedelta(minutes=INTERVALS[args.range]))
+
+ return args
+
+
+def date_from_string(iso_str):
+ try:
+ return dateutil.parser.parse(iso_str)
+ except ValueError:
+ pass
+ raise ValueError('Unrecognized date/time format: %s' % iso_str)
+
+
+def date_from_timestamp(timestamp):
+ return datetime.datetime.utcfromtimestamp(float(timestamp))
+
+
+def date_from_git(date_str):
+ """If date_str is not valid or None, return None."""
+ if not date_str:
+ return None
+ date = None
+ try:
+ date = datetime.datetime.strptime(
+ date_str, '%a %b %d %H:%M:%S %Y')
+ except ValueError:
+ pass
+ return date
+
+
+def local_date_to_timestamp(date):
+ return time.mktime(date.timetuple())
+
+
+def local_to_utc(local_time):
+ timestamp = local_date_to_timestamp(local_time)
+ utcTime = datetime.datetime.utcfromtimestamp(timestamp)
+ return utcTime
+
+
+def fetch_json(url):
+ result = {'error': '404'}
+ delays = [0.0, 0.5, 1.0, 2.0]
+ for retry in range(len(delays)):
+ if retry:
+ time.sleep(delays[retry])
+ try:
+ result = json.loads(urllib2.urlopen(url).read())
+ return result
+ except (IOError, ValueError) as e:
+ logging.warning('Failed to fetch (attempt %d) %s: %s', retry + 1, url, e)
+ logging.error('Permanently failed to fetch %s: %s', url, e)
+ return result
+
+
+def fetch_tree_status(project, end_date, start_date=None, limit=1000):
+ """Fetch all tree events in the given interval.
+
+ Args:
+ project (str): e.g. 'chromium' or 'blink'.
+ end_date (datetime):
+ start_date (datetime): define the time interval in local timezone.
+ limit (int): max. number of events.
+
+ Returns:
+ List of events {'open': bool, 'date': datetime} sorted by date.
+ """
+ end_timestamp = int(time.mktime(end_date.timetuple()))
+ params = {
+ 'format': 'json',
+ 'limit': limit,
+ # Confusingly, chromium-status app defines the range as
+ # endTime <= t < startTime.
+ 'startTime': end_timestamp,
+ }
+ if start_date:
+ params['endTime'] = int(time.mktime(start_date.timetuple()))
+ query = 'allstatus?' + urllib.urlencode(params)
+ url = urlparse.urljoin(PROJECTS[project]['tree-status'], query)
+ logging.debug('Fetching %s', url)
+ status = fetch_json(url)
+ # Bug in AE app: it may return events outside of time range.
+ def entry(event):
+ date_utc = date_from_string(event['date'])
+ date_local = date_from_timestamp(calendar.timegm(date_utc.utctimetuple()))
+ return {'date': date_local,
+ 'open': event['general_state'] == 'open'}
+ def in_range(entry):
+ logging.debug('in_range(%r)', entry)
+ if entry['date'] >= end_date:
+ return False
+ if not start_date:
+ return True
+ return start_date <= entry['date']
+
+ if not status or type(status) is not list:
+ status = []
+ status = [entry(event) for event in status]
+ status = sorted([e for e in status if in_range(e)], key=lambda e: e['date'])
+ return status
+
+
+def fetch_git_page(repo_url, cursor=None, page_size=2000):
+ """Fetch one page worth of logs from gitiles."""
+ params = {
+ 'pretty': 'full',
+ 'format': 'JSON',
+ 'n': page_size,
+ }
+ if cursor:
+ params.update({'s': cursor})
+ url = '%s/%s?%s' % (repo_url, '/+log/master', urllib.urlencode(params))
+ logging.debug('fetch_git_page: url = %s', url)
+ try:
+ # Strip off the anti-XSS string from the response.
+ response = urllib2.urlopen(url)
+ lines = [l.rstrip() for l in response if l.rstrip() != ")]}'"]
+ raw_data = ''.join(lines)
+ page = json.loads(raw_data)
+ except (IOError, ValueError) as e:
+ page = {}
+ logging.error('Failed to fetch a page: %s', e)
+ return page
+
+
+def fetch_git_logs(repo, from_date, to_date):
+ """Fetch all logs from Gitiles for the given date range.
+
+ Gitiles does not natively support time ranges, so we just fetch
+ everything until the range is covered. Assume that logs are ordered
+ in reverse chronological order.
+ """
+ cursor = ''
+ commit_date = to_date
+ data = []
+ while cursor is not None:
+ page = fetch_git_page(repo, cursor)
+ logs = page.get('log', [])
+ cursor = page.get('next')
+ for log in logs:
+ committer = log.get('committer', {})
+ commit_date = date_from_git(committer.get('time'))
+ if not commit_date:
+ continue
+ if commit_date > to_date:
+ continue
+ if commit_date < from_date:
+ break
+ data.append({
+ 'author': log.get('author', {}).get('email'),
+ 'date': commit_date,
+ 'commit-bot': bool('commit-bot' in committer.get('email', '')),
+ 'revision': log.get('commit'),
+ })
+
+ if commit_date < from_date:
+ break
+ return data
+
+
+def fetch_svn_logs(repo, from_date, to_date):
+ from_date = local_to_utc(from_date)
+ to_date = local_to_utc(to_date)
+ range_str = (
+ '{%s +0000}:{%s +0000}' % (from_date, to_date))
+ out = subprocess.check_output(
+ ['svn', 'log', '--with-all-revprops', '--xml', repo, '-r', range_str])
+ data = []
+ for logentry in ElementTree.XML(out).findall('logentry'):
+ date_str = logentry.find('date').text
+ date = date_from_string(date_str)
+ entry = {
+ 'author': logentry.find('author').text,
+ 'date': date,
+ 'revprops': {},
+ 'commit-bot': False,
+ }
+ revprops = logentry.find('revprops')
+ if revprops is not None:
+ for revprop in revprops.findall('property'):
+ entry['revprops'][revprop.attrib['name']] = revprop.text
+ if revprop.attrib['name'] == 'commit-bot':
+ entry['commit-bot'] = True
+ data.append(entry)
+ return data
+
+
+def fetch_stats(args, begin_date=None, stats_range=None):
+ if not begin_date:
+ begin_date = args.date
+ if not stats_range:
+ stats_range = args.range
+ if begin_date:
+ timestamp = (int(local_date_to_timestamp(begin_date)) +
+ INTERVALS[stats_range] * 60)
+ else:
+ timestamp = int(time.time())
+
+ params = {
+ 'project': args.project,
+ 'interval_minutes': INTERVALS[stats_range],
+ 'end': timestamp,
+ 'count': 2, # Fetch requested and previous set, for comparison.
+ }
+
+ query = 'stats/query?' + urllib.urlencode(params)
+ url = urlparse.urljoin(STATS_URL, query)
+ logging.debug('Fetching %s', url)
+ return fetch_json(url)
+
+
+# "Dangerous default value []": pylint: disable=W0102
+def fetch_cq_logs(start_date=None, end_date=None, filters=[]):
+ begin_time = None
+ end_time = None
+ if start_date:
+ begin_time = int(time.mktime(start_date.timetuple()))
+ if end_date:
+ end_time = int(time.mktime(end_date.timetuple()))
+ results = []
+ cursor = None
+ while True:
+ params = {}
+ if begin_time:
+ params['begin'] = begin_time
+ if end_time:
+ params['end'] = end_time
+ if cursor:
+ params['cursor'] = cursor
+ query = 'query/%s?%s' % ('/'.join(filters), urllib.urlencode(params))
+ url = urlparse.urljoin(STATS_URL, query)
+ logging.debug('Fetching %s', url)
+ data = fetch_json(url)
+ results.extend(data.get('results', []))
+ logging.info('fetch_cq_logs: Got %d results', len(results))
+ logging.debug(' %s', '\n '.join(['%s %s' % (
+ patch_url((r.get('fields', {}).get('issue', 0),
+ r.get('fields', {}).get('patchset', 0))),
+ r.get('fields', {}).get('action', '')) for r in results]))
+ cursor = data.get('cursor', None)
+ if not data.get('more', False) or not cursor:
+ break
+
+ return results
+
+
+def default_stats():
+ """Generate all the required stats fields with default values."""
+ stats = {
+ 'begin': datetime.datetime.now(),
+ 'end': datetime.datetime(1, 1, 1),
+ 'issue-count': 0,
+ 'patchset-count': 0,
+ 'attempt-count': 0,
+ 'patch_stats': {},
+ 'patchset-false-reject-count': 0, # Deprecated stats?
+ 'attempt-reject-count': 0, # Num. of rejected attempts
+ 'attempt-false-reject-count': 0, # Num. of falsely rejected attempts
+ 'false-rejections': [], # patches with falsely rejected attempts
+ 'rejections': [], # patches with rejected attempts
+ 'rejected-patches': set(), # Patches that never committed
+ 'patchset-commit-count': 0,
+ 'patchset-total-commit-queue-durations': derive_list_stats([0]),
+ 'patchset-durations': derive_list_stats([0]),
+ 'patchset-committed-durations': derive_list_stats([0]),
+ 'patchset-attempts': derive_list_stats([0]),
+ 'patchset-committed-attempts': derive_list_stats([0]),
+ 'jobs': {},
+ 'tree': {'open': 0.0, 'total': 0.0},
+ 'usage': {},
+ }
+ for reason in REASONS:
+ stats[reason] = []
+ return stats
+
+
+def organize_stats(stats, latest_init=None, previous_init=None):
+ """Changes cached lists of stats into dictionaries.
+
+ Args:
+ stats (dict): set of stats as returned by chromium-cq-status.
+
+ Returns:
+ result (dict): mapping ('latest' | 'previous') -> stat.name
+ -> <stats json>. If latest/previous_init are given, add to those
+ stats rather than compute them from scratch.
+ """
+ if 'results' not in stats:
+ return None
+ result = {
+ 'latest' : latest_init if latest_init else default_stats(),
+ 'previous': previous_init if previous_init else default_stats(),
+ }
+ for dataset, name in zip(stats['results'], ['latest', 'previous']):
+ result[name]['begin'] = min(
+ date_from_timestamp(dataset['begin']),
+ result[name].get('begin', datetime.datetime.now()))
+ result[name]['end'] = max(date_from_timestamp(dataset['end']),
+ result[name]['end'])
+ re_trybot_pass_count = re.compile('^trybot-(.+)-pass-count$')
+ re_trybot_false_reject_count = re.compile(
+ '^trybot-(.+)-false-reject-count$')
+ assert 'jobs' in result[name] and isinstance(result[name]['jobs'], dict)
+ for data in dataset['stats']:
+ if data['type'] == 'count':
+ result[name][data['name']] = data['count']
+ match_pass = re_trybot_pass_count.match(data['name'])
+ match_false_reject = re_trybot_false_reject_count.match(data['name'])
+ if match_pass:
+ job_name = match_pass.group(1)
+ result[name]['jobs'].setdefault(job_name, {
+ 'pass-count': 0,
+ 'false-reject-count': 0,
+ })
+ result[name]['jobs'][job_name]['pass-count'] += data['count']
+ logging.debug('Job %s passed %d times. Jobs: %r',
+ job_name, data['count'], result[name]['jobs'])
+ if match_false_reject:
+ job_name = match_false_reject.group(1)
+ result[name]['jobs'].setdefault(job_name, {
+ 'pass-count': 0,
+ 'false-reject-count': 0,
+ })
+ result[name]['jobs'][job_name]['false-reject-count'] += data['count']
+ logging.debug('Job %s flakily failed %d times',
+ job_name, data['count'])
+ else:
+ assert data['type'] == 'list'
+ result[name][data['name']] = {
+ '10': data['percentile_10'],
+ '25': data['percentile_25'],
+ '50': data['percentile_50'],
+ '75': data['percentile_75'],
+ '90': data['percentile_90'],
+ '95': data['percentile_95'],
+ '99': data['percentile_99'],
+ 'min': data['min'],
+ 'max': data['max'],
+ 'mean': data['mean'],
+ 'size': data['sample_size'],
+ }
+ return result
+
+
+def derive_list_stats(series):
+ if not series:
+ series = [0]
+ return {
+ '10': numpy.percentile(series, 10),
+ '25': numpy.percentile(series, 25),
+ '50': numpy.percentile(series, 50),
+ '75': numpy.percentile(series, 75),
+ '90': numpy.percentile(series, 90),
+ '95': numpy.percentile(series, 95),
+ '99': numpy.percentile(series, 99),
+ 'min': min(series),
+ 'max': max(series),
+ 'mean': numpy.mean(series),
+ 'size': len(series),
+ }
+
+
+def sort_by_count(elements):
+ return sorted(elements, key=lambda p: p['count'], reverse=True)
+
+
+def stats_by_count_entry(patch_stats, name, patch, reasons):
+ entry = {
+ 'count': patch_stats[name],
+ 'patch_id': patch,
+ 'failed-jobs-details': patch_stats['failed-jobs-details']
+ }
+ for n in reasons:
+ if n in patch_stats:
+ entry[n] = patch_stats[n]
+ assert type(entry[n]) is int, 'Bad type in %s[%s]: %r\nEntry=%r' % (
+ patch, n, entry[n], entry)
+ return entry
+
+
+# "Dangerous default value []": pylint: disable=W0102
+def stats_by_count(patch_stats, name, reasons=[]):
+ return sort_by_count([
+ stats_by_count_entry(patch_stats[p], name, p, reasons)
+ for p in patch_stats if patch_stats[p].get(name)])
+
+
+def _derive_stats_from_patch_stats(stats):
+ patch_stats = stats['patch_stats']
+ stats['attempt-count'] = sum(
+ patch_stats[p]['attempts'] for p in patch_stats)
+ stats['patchset-false-reject-count'] = sum(
+ patch_stats[p]['false-rejections'] for p in patch_stats)
+ stats['attempt-reject-count'] = sum(
+ patch_stats[p]['rejections'] for p in patch_stats)
+ stats['rejected-patches'] = set(
+ p for p in patch_stats if not patch_stats[p]['committed'])
+ stats['false-rejections'] = stats_by_count(
+ patch_stats, 'false-rejections', REASONS)
+ stats['rejections'] = stats_by_count(patch_stats, 'rejections', REASONS)
+ for r in REASONS:
+ stats[r] = stats_by_count(patch_stats, r, set(REASONS) - set([r]))
+
+ stats['patchset-commit-count'] = len([
+ p for p in patch_stats if patch_stats[p]['committed']])
+ stats['patchset-total-commit-queue-durations'] = derive_list_stats([
+ patch_stats[p]['patchset-duration-wallclock'] for p in patch_stats])
+ stats['patchset-durations'] = derive_list_stats([
+ patch_stats[p]['patchset-duration'] for p in patch_stats])
+ stats['patchset-committed-durations'] = derive_list_stats([
+ patch_stats[p]['patchset-duration'] for p in patch_stats
+ if patch_stats[p]['committed']])
+ stats['patchset-attempts'] = derive_list_stats([
+ patch_stats[p]['attempts'] for p in patch_stats])
+ stats['patchset-committed-attempts'] = derive_list_stats([
+ patch_stats[p]['attempts'] for p in patch_stats
+ if patch_stats[p]['committed']])
+
+
+def derive_stats(args, begin_date, init_stats=None):
+ """Process raw CQ updates log and derive stats.
+
+ Fetches raw CQ events and returns the same format as organize_stats().
+ If ``init_stats`` are given, preserve the jobs stats and replace the
+ other stats.
+ """
+ stats = init_stats or default_stats()
+ filters = ['project=%s' % args.project, 'action=patch_stop']
+ end_date = begin_date + datetime.timedelta(minutes=INTERVALS[args.range])
+ results = fetch_cq_logs(begin_date, end_date, filters=filters)
+ if not results:
+ return stats
+
+ stats['begin'] = date_from_timestamp(results[-1]['timestamp'])
+ stats['end'] = date_from_timestamp(results[0]['timestamp'])
+ # Create map issue:patchset -> #attempts
+ patches, issues = set(), set()
+ for reason in results:
+ issues.add(reason['fields']['issue'])
+ patches.add((reason['fields']['issue'], reason['fields']['patchset']))
+ stats['issue-count'] = len(issues)
+ stats['patchset-count'] = len(patches)
+ patch_stats = {}
+ # Fetch and process each patchset log
+ def get_patch_stats(patch_id):
+ return derive_patch_stats(end_date, patch_id)
+
+ if args.seq or not args.thread_pool:
+ iterable = map(get_patch_stats, patches)
+ else:
+ pool = ThreadPool(min(args.thread_pool, len(patches)))
+ iterable = pool.imap_unordered(get_patch_stats, patches)
+ for patch_id, pstats in iterable:
+ patch_stats[patch_id] = pstats
+
+ stats['patch_stats'] = patch_stats
+ _derive_stats_from_patch_stats(stats)
+ return stats
+
+
+def patch_url(patch_id):
+ return '%s/patch-status/%s/%s' % ((STATS_URL,) + patch_id)
+
+
+def parse_json(obj, return_type=None):
+ """Attempt to interpret a string as JSON.
+
+ Guarantee the return type if given, pass through otherwise.
+ """
+ result = obj
+ if type(obj) in [str, unicode]:
+ try:
+ result = json.loads(obj)
+ except ValueError as e:
+ logging.error('Could not decode json in "%s": %s', obj, e)
+ # If the type is wrong, return an empty object of the correct type.
+ # In most cases, casting to the required type will not work anyway
+ # (e.g. list to dict).
+ if return_type and type(result) is not return_type:
+ result = return_type()
+ return result
+
+
+def parse_failing_tryjobs(message):
+ """Parse the message to extract failing try jobs."""
+ builders = []
+ msg_lines = message.splitlines()
+ for line in msg_lines[1:]:
+ words = line.split(None, 1)
+ if not words:
+ continue
+ builder = words[0]
+ builders.append(builder)
+ return builders
+
+
+def derive_patch_stats(end_date, patch_id):
+ """``patch_id`` is a tuple (issue, patchset)."""
+ results = fetch_cq_logs(end_date=end_date, filters=[
+ 'issue=%s' % patch_id[0], 'patchset=%s' % patch_id[1]])
+ # The results should already ordered, but sort it again just to be sure.
+ results = sorted(results, key=lambda r: r['timestamp'], reverse=True)
+ logging.debug('derive_patch_stats(%r): fetched %d entries.',
+ patch_id, len(results))
+ # Group by attempts
+ attempts = []
+
+ def new_attempt():
+ attempt_empty = {
+ 'id': 0,
+ 'begin': 0.0,
+ 'end': 0.0,
+ 'duration': 0.0,
+ 'actions': [],
+ 'committed': False,
+ 'reason': {},
+ }
+ for reason in REASONS:
+ attempt_empty[reason] = False
+ return attempt_empty
+
+ def add_attempt(attempt, counter):
+ """Create a new attempt from accumulated actions."""
+ assert attempt['actions']
+ attempt['id'] = counter
+ attempt['duration'] = attempt['end'] - attempt['begin']
+ known_reasons = [r for r in KNOWN_REASONS if attempt[r]]
+ if not attempt['committed'] and not known_reasons:
+ attempt['failed-unknown'] = True
+ logging.debug(
+ 'add_attempt: #%d (%s)',
+ attempt['id'],
+ ', '.join([r for r in REASONS if attempt[r]]))
+ attempts.append(attempt)
+
+ # An attempt is a set of actions between patch_start and patch_stop
+ # actions. Repeated patch_start / patch_stop actions are ignored.
+ attempt = new_attempt()
+ failing_builders = {}
+ state = 'stop'
+ attempt_counter = 0
+ for result in reversed(results):
+ action = result['fields'].get('action')
+ if state == 'stop':
+ if action == 'patch_start':
+ state = 'start'
+ attempt['begin'] = result['timestamp']
+ if state != 'start':
+ continue
+ attempt['actions'].append(result)
+ if action == 'patch_stop':
+ attempt['end'] = result['timestamp']
+ message = result['fields'].get('message', '')
+ if 'CQ bit was unchecked on CL' in message:
+ attempt['manual-cancel'] = True
+ if 'No LGTM' in message:
+ attempt['missing-lgtm'] = True
+ if 'A disapproval has been posted' in message:
+ attempt['not-lgtm'] = True
+ if 'Transient error: Invalid delimiter' in message:
+ attempt['invalid-delimiter'] = True
+ if 'Failed to commit' in message:
+ attempt['failed-commit'] = True
+ if('Failed to apply patch' in message or
+ 'Failed to apply the patch' in message):
+ attempt['failed-patch'] = True
+ if 'Presubmit check' in message:
+ attempt['failed-presubmit-check'] = True
+ if 'Try jobs failed' in message:
+ if 'presubmit' in message:
+ attempt['failed-presubmit-bot'] = True
+ else:
+ attempt['failed-jobs'] = message
+ builders = parse_failing_tryjobs(message)
+ for b in builders:
+ failing_builders.setdefault(b, 0)
+ failing_builders[b] += 1
+ if 'Exceeded time limit waiting for builds to trigger' in message:
+ attempt['failed-to-trigger'] = True
+ attempt_counter += 1
+ add_attempt(attempt, attempt_counter)
+ attempt = new_attempt()
+ state = 'stop'
+ continue
+ if action == 'patch_committed':
+ attempt['committed'] = True
+ # TODO(sergeyberezin): enable this after this action is stable in CQ.
+ if action == 'patch_failed':
+ attempt['reason'] = parse_json(
+ result['fields'].get('reason', {}), return_type=dict)
+ logging.info('Attempt reason: %r', attempt['reason'])
+ if attempt['reason'].get('fail_type') == 'reviewer_lgtm':
+ attempt['missing-lgtm'] = True
+ if attempt['reason'].get('fail_type') == 'commit':
+ attempt['failed-commit'] = True
+ if attempt['reason'].get('fail_type') == 'simple try job':
+ failed_jobs = parse_json(attempt['reason'].get(
+ 'fail_details', [('unknown_master', 'unknown_bot')]))
+ # Remove presubmit bot - it's accounted separately.
+ failed_jobs = [j for j in failed_jobs if 'presubmit' in j[1]]
+ attempt['failed-jobs'] = failed_jobs
+
+ stats = {}
+ committed_set = set(a['id'] for a in attempts if a['committed'])
+ stats['committed'] = len(committed_set)
+ stats['attempts'] = len(attempts)
+ stats['rejections'] = stats['attempts'] - stats['committed']
+
+ logging.info('derive_patch_stats: %s has %d attempts, committed=%d',
+ patch_url(patch_id), len(attempts), stats['committed'])
+
+ valid_reasons_set = set()
+ for reason in VALID_REASONS:
+ s = set(a['id'] for a in attempts if a[reason])
+ stats[reason] = len(s)
+ valid_reasons_set.update(s)
+ for reason in set(REASONS) - set(VALID_REASONS):
+ stats[reason] = len(set(a['id'] for a in attempts if a[reason]))
+
+ # Populate failed builders.
+ stats['failed-jobs-details'] = failing_builders
+
+ stats['false-rejections'] = 0
+ if stats['committed']:
+ stats['false-rejections'] = len(
+ set(a['id'] for a in attempts) - committed_set - valid_reasons_set)
+ # Sum of attempt duration.
+ stats['patchset-duration'] = sum(a['duration'] for a in attempts)
+ if attempts:
+ stats['patchset-duration-wallclock'] = (
+ attempts[-1]['end'] - attempts[0]['begin'])
+ else:
+ stats['patchset-duration-wallclock'] = 0.0
+ return patch_id, stats
+
+
+def derive_tree_stats(project, start_date, end_date):
+ """Given a list of tree status events, derive tree closure stats."""
+ # Fetch one more event right before the range, so we know the
+ # initial tree status.
+ status = (fetch_tree_status(project, end_date=start_date, limit=1) +
+ fetch_tree_status(project, end_date, start_date))
+ stats = {'open': 0.0, 'total': (end_date - start_date).total_seconds()}
+ if not status:
+ return stats
+
+ logging.debug('Tree status:\n%s', '\n'.join([' %r' % e for e in status]))
+
+ is_open = status[0]['open']
+ curr_date = start_date
+ for event in status[1:]:
+ delta = event['date'] - curr_date
+ if is_open and not event['open']:
+ stats['open'] += delta.total_seconds()
+ logging.debug('Tree was open from %s to %s for %s (total of %s)',
+ curr_date, event['date'],
+ delta, datetime.timedelta(seconds=stats['open']))
+ if not is_open:
+ curr_date = event['date']
+ is_open = event['open']
+
+ # Account for the remaining time after the last event.
+ if is_open:
+ delta = end_date - curr_date
+ stats['open'] += delta.total_seconds()
+ logging.debug('Tree was open from %s to %s for %s (total of %s)',
+ curr_date, end_date,
+ delta, datetime.timedelta(seconds=stats['open']))
+ return stats
+
+
+def derive_log_stats(log_data, bots):
+ # Calculate stats.
+ cq_commits = [v for v in log_data if v['commit-bot']]
+ users = {}
+ for commit in cq_commits:
+ users[commit['author']] = users.get(commit['author'], 0) + 1
+ committers = {}
+ manual_committers = {}
+ bot_committers = {}
+ bot_manual_committers = {}
+ for commit in log_data:
+ committers[commit['author']] = committers.get(commit['author'], 0) + 1
+ if not commit['commit-bot']:
+ manual_committers[commit['author']] = manual_committers.get(
+ commit['author'], 0) + 1
+ if commit['author'] in bots:
+ bot_committers[commit['author']] = bot_committers.get(
+ commit['author'], 0) + 1
+ if not commit['commit-bot']:
+ bot_manual_committers[commit['author']] = bot_manual_committers.get(
+ commit['author'], 0) + 1
+
+ stats = {}
+ stats['cq_commits'] = len(cq_commits)
+ stats['total_commits'] = len(log_data)
+ stats['users'] = len(users)
+ stats['committers'] = len(committers)
+ stats['manual_committers'] = len(manual_committers)
+ stats['manual_commits'] = sum(x for x in manual_committers.itervalues())
+ stats['bot_committers'] = len(bot_committers)
+ stats['bot_commits'] = sum(x for x in bot_committers.itervalues())
+ stats['bot_manual_commits'] = sum(
+ x for x in bot_manual_committers.itervalues())
+ stats['manual_only_committers'] = {
+ a: c for a, c in committers.iteritems()
+ if c == manual_committers.get(a, 0)}
+ return stats
+
+def derive_git_stats(project, start_date, end_date, bots):
+ log_data = fetch_git_logs(PROJECTS[project]['repo'], start_date, end_date)
+ return derive_log_stats(log_data, bots)
+
+
+def derive_svn_stats(project, start_date, end_date, bots):
+ log_data = fetch_svn_logs(PROJECTS[project]['repo'], start_date, end_date)
+ return derive_log_stats(log_data, bots)
+
+
+def percentage_tuple(data, total):
+ num_data = data if isinstance(data, numbers.Number) else len(data)
+ num_total = total if isinstance(total, numbers.Number) else len(total)
+ percent = 100. * num_data / num_total if num_total else 0.
+ return num_data, num_total, percent
+
+
+def percentage(data, total):
+ return percentage_tuple(data, total)[2]
+
+
+def round_timedelta(seconds):
+ # We never care about the milliseconds when printing timedeltas:
+ return datetime.timedelta(seconds=round(seconds))
+
+
+def output(fmt='', *args):
+ """An equivalent of print to mock out in testing."""
+ print fmt % args
+
+
+def print_attempt_counts(stats, name, message, item_name='',
+ details=False, committed=None, indent=0,
+ print_zero=False):
+ """Print a summary of a ``name`` slice of attempts.
+
+ |committed|: None=print all, True=only committed patches, False=only
+ rejected patches.
+
+ |print_zero|: print stats even if no attempts match."""
+ if not item_name:
+ item_name = message
+ patches = [
+ p for p in stats[name]
+ if committed is None or
+ bool(stats['patch_stats'][p['patch_id']]['committed']) is committed]
+ count = sum(p['count'] for p in patches)
+ if not print_zero and not count:
+ return
+
+ failing_builders = {}
+ for p in patches:
+ for b, cnt in p['failed-jobs-details'].iteritems():
+ failing_builders.setdefault(b, {})
+ failing_builders[b][p['patch_id']] = cnt
+
+ indent_str = ''.join(' ' for _ in range(indent))
+ if message.startswith('failed jobs'):
+ output(
+ '%s%4d attempt%s (%.1f%% of %d attempts) %s: %d in %d%s patches',
+ indent_str, count, ' ' if count == 1 else 's',
+ percentage(count, stats['attempt-count']),
+ stats['attempt-count'],
+ message,
+ sum(sum(d.values()) for d in failing_builders.values()),
+ len(patches),
+ {True: ' committed', False: ' uncommitted'}.get(committed, ''))
+ else:
+ output(
+ '%s%4d attempt%s (%.1f%% of %d attempts) %s in %d%s patches',
+ indent_str, count, ' ' if count == 1 else 's',
+ percentage(count, stats['attempt-count']),
+ stats['attempt-count'],
+ message,
+ len(patches),
+ {True: ' committed', False: ' uncommitted'}.get(committed, ''))
+ if details:
+ lines = []
+ for p in patches:
+ line = ' %d %s %s' % (
+ p['count'], item_name, patch_url(p['patch_id']))
+ causes = ['%d %s' % (p['failed-jobs-details'][k], k)
+ for k in p['failed-jobs-details']]
+ line += ' (%s)' % ', '.join(causes)
+ lines.append(line)
+ output('\n'.join(lines))
+ output()
+
+
+def print_duration(name, args, latest, previous, print_name=None):
+ if not print_name:
+ print_name = name.capitalize()
+ cq_only = round_timedelta(latest['patchset-durations'][name])
+ wallclock = round_timedelta(
+ latest['patchset-total-commit-queue-durations'][name])
+ prev_cq_only = 'unknown'
+ prev_cq_only_s = 'unknown'
+ prev_wallclock = 'unknown'
+ prev_wallclock_s = 'unknown'
+ if previous:
+ prev_cq_only = round_timedelta(previous['patchset-durations'][name])
+ prev_cq_only_s = int(prev_cq_only.total_seconds() / 60)
+ prev_wallclock = round_timedelta(
+ previous['patchset-total-commit-queue-durations'][name])
+ prev_wallclock_s = int(prev_wallclock.total_seconds() / 60)
+ output('\n%s duration in CQ trying a patch:', print_name)
+ output(
+ ' wallclock: %8s (%3d min). Prev. %s: %8s (%3s min).',
+ wallclock, round(wallclock.total_seconds() / 60.0), args.range,
+ prev_wallclock, prev_wallclock_s)
+ output(
+ ' sum of attempts: %8s (%3d min). Prev. %s: %8s (%3s min).',
+ cq_only, round(cq_only.total_seconds() / 60.0), args.range,
+ prev_cq_only, prev_cq_only_s)
+
+
+def print_usage(args, latest, previous):
+ if not latest['usage']:
+ return
+ output()
+ output(
+ 'CQ users: %6d out of %6d total committers %6.2f%%',
+ latest['usage']['users'], latest['usage']['committers'],
+ percentage(latest['usage']['users'], latest['usage']['committers']))
+ fmt_str = (
+ ' Committed %6d out of %6d commits %6.2f%%. '
+ 'Prev. %s: %6.2f%%.')
+ data = percentage_tuple(latest['usage']['cq_commits'],
+ latest['usage']['total_commits'])
+ data += (args.range, percentage(previous['usage']['cq_commits'],
+ previous['usage']['total_commits']))
+ if latest['usage']['bot_manual_commits']:
+ fmt_str += ' (%6.2f%% by humans)'
+ data += (percentage(latest['usage']['cq_commits'],
+ latest['usage']['total_commits'] -
+ latest['usage']['bot_manual_commits']),)
+ output(fmt_str, *data)
+
+ output()
+ output('Bots: %6d out of %6d total committers %6.2f%%',
+ *percentage_tuple(latest['usage']['bot_committers'],
+ latest['usage']['committers']))
+ output(' Committed by CQ %6d out of %6d commits %6.2f%%',
+ *percentage_tuple(latest['usage']['bot_commits'],
+ latest['usage']['total_commits']))
+ output(' Committed directly %6d out of %6d commits %6.2f%%',
+ *percentage_tuple(latest['usage']['bot_manual_commits'],
+ latest['usage']['total_commits']))
+ output()
+ output('Manual committers: %6d out of all %6d users %6.2f%%',
+ *percentage_tuple(latest['usage']['manual_committers'],
+ latest['usage']['committers']))
+ output(' Committed %6d out of %6d commits %6.2f%%',
+ *percentage_tuple(latest['usage']['manual_commits'],
+ latest['usage']['total_commits']))
+
+
+def print_tree_status(args, latest, previous):
+ output()
+ output(
+ 'Total time tree open: %.1f hours of %.1f hours (%.2f%%). '
+ 'Prev. %s: %.2f%%.',
+ latest['tree']['open'] / 3600.0,
+ latest['tree']['total'] / 3600.0,
+ percentage(latest['tree']['open'], latest['tree']['total']),
+ args.range,
+ percentage(previous['tree']['open'], previous['tree']['total']))
+
+
+def print_stats(args, stats):
+ latest = stats.get('latest')
+ previous = stats.get('previous')
+ if not latest:
+ output('No stats to display.')
+ return
+ if not previous:
+ output('No previous %s stats loaded.', args.range)
+ return
+ output('Statistics for project %s', args.project)
+ if latest['begin'] > latest['end']:
+ output(' No stats since %s', args.date)
+ return
+
+ output('from %s till %s (local time).',
+ latest['begin'], latest['end'])
+
+ print_usage(args, latest, previous)
+ print_tree_status(args, latest, previous)
+
+ output()
+ output(
+ '%4d issues (%d patches) were tried by CQ, '
+ 'resulting in %d attempts.',
+ latest['issue-count'], latest['patchset-count'], latest['attempt-count'])
+ output(
+ '%4d patches (%.1f%% of tried patches, %.1f%% of attempts) '
+ 'were committed by CQ,',
+ latest['patchset-commit-count'],
+ percentage(latest['patchset-commit-count'], latest['patchset-count']),
+ percentage(latest['patchset-commit-count'], latest['attempt-count']))
+
+
+ output()
+ output('Rejections:')
+ print_attempt_counts(latest, 'rejections', 'were unsuccessful',
+ item_name='failures',
+ committed=False)
+ output(' This includes:')
+ for reason in REASONS:
+ print_attempt_counts(latest, reason, REASONS[reason]['message'], indent=2,
+ details=args.list_rejections,
+ item_name=REASONS[reason]['item'], committed=False)
+
+ # TODO(sergeyberezin): add gave up count (committed manually after trying CQ).
+ # TODO(sergeyberezin): add count of NOTRY=true (if possible).
+
+ output()
+ output('False Rejections:')
+ if args.use_logs:
+ print_attempt_counts(latest, 'false-rejections', 'were false rejections',
+ item_name='flakes', committed=True)
+ else:
+ output(
+ ' %4d attempts (%.1f%% of %d attempts) were false rejections',
+ latest['attempt-false-reject-count'],
+ percentage(latest['attempt-false-reject-count'],
+ latest['attempt-count']),
+ latest['attempt-count'])
+
+ output(' False rejections include:')
+ for reason in FLAKY_REASONS.keys() + ['failed-unknown']:
+ print_attempt_counts(latest, reason, REASONS[reason]['message'], indent=2,
+ item_name=REASONS[reason]['item'], committed=True,
+ details=args.list_false_rejections)
+
+ output(' Other rejections in committed patches for valid reasons:')
+ for reason in VALID_REASONS.keys():
+ print_attempt_counts(latest, reason, REASONS[reason]['message'], indent=2,
+ item_name=REASONS[reason]['item'], committed=True,
+ details=args.list_false_rejections)
+
+ print_duration('mean', args, latest, previous)
+ print_duration('50', args, latest, previous, 'Median')
+
+ output()
+ output('Patches which eventually land percentiles:')
+ for p in ['10', '25', '50', '75', '90', '95', '99']:
+ output('%s: %4.1f hrs, %2d attempts',
+ p, latest['patchset-committed-durations'][p] / 3600.0,
+ latest['patchset-committed-attempts'][p])
+
+ # TODO(sergeyberezin): add total try jobs / by CQ / unknown. Get it from CBE.
+ # TODO(sergeyberezin): recompute bot flakiness from CBE. CQ does not
+ # have enough info.
+ output()
+ output('Top flaky builders (which fail and succeed in the same patch):')
+
+ logging.debug('Found %d jobs', len(latest['jobs'].keys()))
+
+ def flakiness(stats, job):
+ passes = stats['jobs'][job]['pass-count']
+ failures = stats['jobs'][job]['false-reject-count']
+ return percentage(failures, passes + failures)
+
+ jobs = sorted(latest['jobs'].iterkeys(), key=lambda j: flakiness(latest, j),
+ reverse=True)
+ output('%-40s %-15s %-15s %-15s',
+ 'Builder Name', 'Succeeded', 'Flaky Failures', 'Flakiness (%)')
+ for job in jobs:
+ passes = latest['jobs'][job]['pass-count']
+ failures = latest['jobs'][job]['false-reject-count']
+ output('%-40s %-15s %-15s %-15s',
+ job, '%5d' % passes, '%5d' % failures,
+ '%6.2f%%' % flakiness(latest, job))
+
+
+def acquire_stats(args):
+ stats = {}
+ logging.info('Acquiring stats for project %s for a %s of %s using %s',
+ args.project, args.range, args.date,
+ 'logs' if args.use_logs else 'cache')
+ end_date = args.date + datetime.timedelta(minutes=INTERVALS[args.range])
+ prev_date = args.date - datetime.timedelta(minutes=INTERVALS[args.range])
+ if args.use_logs:
+ init_stats = {'latest': default_stats(), 'previous': default_stats()}
+ assert args.date
+ # For weekly stats, collect job flakiness from daily cached stats.
+ if args.range == 'week':
+ for day in range(7):
+ d = args.date + datetime.timedelta(minutes=INTERVALS['day']*day)
+ raw_stats = fetch_stats(args, d, 'day')
+ init_stats = organize_stats(raw_stats, latest_init=init_stats['latest'])
+ elif args.range == 'day':
+ for hour in range(24):
+ d = args.date + datetime.timedelta(minutes=INTERVALS['hour']*hour)
+ raw_stats = fetch_stats(args, d, 'hour')
+ init_stats = organize_stats(raw_stats, latest_init=init_stats['latest'])
+ else:
+ init_stats = organize_stats(fetch_stats(args))
+ stats['latest'] = derive_stats(
+ args, args.date, init_stats=init_stats['latest'])
+ stats['previous'] = derive_stats(args, prev_date)
+ else:
+ stats = organize_stats(fetch_stats(args))
+
+ stats['latest']['tree'] = derive_tree_stats(args.project, args.date, end_date)
+ stats['previous']['tree'] = derive_tree_stats(
+ args.project, prev_date, args.date)
+
+ if PROJECTS[args.project]['type'] == 'git':
+ stats['latest']['usage'] = derive_git_stats(
+ args.project, args.date, end_date, args.bots)
+ stats['previous']['usage'] = derive_git_stats(
+ args.project, prev_date, args.date, args.bots)
+ else:
+ stats['latest']['usage'] = derive_svn_stats(
+ args.project, args.date, end_date, args.bots)
+ stats['previous']['usage'] = derive_svn_stats(
+ args.project, prev_date, args.date, args.bots)
+
+ return stats
+
+def main():
+ args = parse_args()
+ logger = logging.getLogger()
+ # TODO(sergeyberezin): how do I derive local timezone string?
+ # Need to be able to pass dateutil.tz.tzlocal() directly.
+ infra.libs.logs.process_argparse_options(args, logger)
+ stats = acquire_stats(args)
+ print_stats(args, stats)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
« no previous file with comments | « infra/tools/cq_stats/__main__.py ('k') | infra/tools/cq_stats/test/__init__.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698