Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(978)

Side by Side Diff: commit-queue/tools/stats.py

Issue 135363007: Delete public commit queue to avoid confusion after move to internal repo (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « commit-queue/tools/process.py ('k') | commit-queue/tools/watch.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5 '''Generate stats of CQ usage.'''
6
7 # sudo apt-get install pip; sudo pip install requests request_cache
8 import argparse
9 import collections
10 from datetime import timedelta, datetime
11 import itertools
12 import numpy
13 import operator
14 import re
15 import requests_cache
16 import time
17 import sys
18
19
20 class Unbuffered:
21 def __init__(self, stream):
22 self.stream = stream
23 def write(self, data):
24 self.stream.write(data)
25 self.stream.flush()
26 def __getattr__(self, attr):
27 return getattr(self.stream, attr)
28 sys.stdout = Unbuffered(sys.stdout)
29
30
31 CONFIG = {
32 'chrome': {
33 'target_base_url_regexps': [
34 re.compile(r'^svn://svn\.chromium\.org/chrome/trunk/src'),
35 # pylint: disable=C0301
36 re.compile(r'^https://chromium\.googlesource\.com/chromium/chromium/src(?: \.git)?(?:@[\w\-]+)?$'),
37 re.compile(r'^http://src\.chromium\.org/chrome/trunk/src'),
38 ],
39 },
40 'blink': {
41 'target_base_url_regexps': [
42 re.compile(r'^svn://svn\.chromium\.org/blink/(?:trunk/?)?$'),
43 # pylint: disable=C0301
44 re.compile(r'^https://chromium\.googlesource\.com/chromium/blink(?:\.git)? (?:@[\w\-]+)?$'),
45 re.compile(r'^http://src\.chromium\.org/blink/(?:trunk/?)?$'),
46 ],
47 },
48 'skia': {
49 'target_base_url_regexps': [
50 re.compile(r'^http://skia.googlecode.com/svn/trunk'),
51 re.compile(r'^https://skia.googlecode.com/svn/trunk'),
52 ],
53 },
54 }
55 config_name = None
56
57
58 def make_throttle_hook(timeout=1.0):
59 # pylint: disable=W0613
60 def hook(response, *args, **kwargs):
61 print response.url
62 if not getattr(response, 'from_cache', False):
63 print 'sleeping'
64 time.sleep(timeout)
65 return response
66 return hook
67
68
69 # It seems reasonable to expect that rietveld can handle 20 query-per-second
70 # load from this script. Ask someone on the infra team before reducing this
71 # value.
72 seconds_between_requests = 0.05
73
74 requests_cache.install_cache('cq')
75 session = requests_cache.CachedSession()
76 # if args.clear:
77 # print 'Clearing cache...'
78 # session.cache.clear()
79 session.hooks = {'response': make_throttle_hook(seconds_between_requests)}
80
81
82 def fetch_recent_issues(limit, args, cursor=None):
83 search_url = ('https://codereview.chromium.org/search?format=json&'
84 'limit=%d') % limit
85 if cursor:
86 search_url += '&cursor=%s' % cursor
87 if args.modified_after:
88 search_url += '&modified_after=%s' % args.modified_after
89 if args.modified_before:
90 search_url += '&modified_before=%s' % args.modified_before
91 response = session.get(search_url)
92 return response.json()
93
94
95 def get_target_base_url_regexps():
96 return CONFIG[config_name]['target_base_url_regexps']
97
98
99 def matches_target_base_url(url):
100 for regexp in get_target_base_url_regexps():
101 match = regexp.match(url)
102 if match:
103 return True
104 return False
105
106
107 def issue_patch_string(issue_id, patch_id):
108 return '%s:%s' % (issue_id, patch_id)
109
110
111 def issue_display_url(issue_id):
112 return 'https://codereview.chromium.org/%s' % issue_id
113
114
115 def issue_url(issue_id):
116 return 'https://codereview.chromium.org/api/%s?messages=true' % issue_id
117
118
119 def date_from_iso_str(iso_str):
120 # Issues have one of two formats:
121 # 2013-10-17 16:43:04.391480
122 # 2013-10-17 16:43:04
123 try:
124 return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S.%f')
125 except ValueError:
126 return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S')
127
128
129 def round_timedelta(delta):
130 # We never care about the milliseconds when printing timedeltas:
131 return timedelta(seconds=round(delta.total_seconds()))
132
133
134 def fetch_issue(issue_id):
135 url = issue_url(issue_id)
136 this_json = session.get(url).json()
137 # pylint: disable=W0212
138 cache_key = session.cache._url_to_key(url)
139 _, now = session.cache.get_response_and_time(cache_key)
140 return this_json, now
141
142
143 def format_line(values, aligns, widths):
144 values = map(str, values)
145 args = zip(values, aligns, widths)
146 return ' '.join([apply(align, (value, width))
147 for value, align, width in args])
148
149
150 class CQAttempt(object):
151 def __init__(self):
152 self.result = None
153 self.patch_id = None
154 self.issue_id = None
155 self.author = None
156 self.start_time = None
157 self.end_time = None
158
159 @property
160 def issue_patch_string(self):
161 return issue_patch_string(self.issue_id, self.patch_id)
162
163 @property
164 def duration(self):
165 return self.end_time - self.start_time
166
167
168 class CQAnalyzer(object):
169 def __init__(self):
170 self.state = None
171 self.issue = None
172 self.issue_id = None
173 self.author = None
174 self.patch_id = None
175 self.start_time = None
176 self.attempts = []
177 self.manual_commits = set()
178 self.no_try_issues = set()
179 self.now = None
180 # issue_id -> time since CQ start
181 self.in_progress = {}
182
183 # pylint: disable=C0301
184 start_regexp = re.compile('''CQ is trying da patch. Follow status at
185 https://chromium-status.appspot.com/cq/(?P<author>.*)/(?P<issue_id>\d+)/(?P<patc h_id>\d+)''')
186 committed_regexp = re.compile('Change committed as (?P<revision>\d+)')
187 # pylint: disable=C0301
188 manual_commit_regexp = re.compile('Committed patchset #(?P<patch_id>\d+) manua lly as r(?P<revision>\d+)')
189
190 def _reset(self):
191 self.state = None
192 self.author = None
193 self.patch_id = None
194 self.start_time = None
195
196 def _start(self, patch_id, start_time, author=None):
197 if self.state == 'START' and self.patch_id == patch_id:
198 # print 'Restarted for patchset %s on %s' % (
199 # patch_id, issue_display_url(self.issue_id))
200 self._end('RESTART', start_time)
201 self.state = 'START'
202 self.patch_id = patch_id
203 self.author = author
204 assert start_time
205 self.start_time = start_time
206
207 def _end(self, result, end_time):
208 if self.state != 'START':
209 print 'ERROR: %s for %s without start??' % (
210 result, issue_display_url(self.issue_id))
211 self._reset()
212 return
213 attempt = CQAttempt()
214 attempt.result = result
215 attempt.patch_id = self.patch_id
216 attempt.issue_id = self.issue_id
217 attempt.start_time = self.start_time
218 attempt.end_time = end_time
219 self.attempts.append(attempt)
220 self._reset()
221
222 def _start_and_end(self, result, date):
223 self._start(result, date)
224 self._end(result, date)
225
226 def _manual_commit(self, patch_id):
227 self.manual_commits.add(issue_patch_string(self.issue_id, patch_id))
228
229 def process_message(self, message):
230 text = message['text']
231 date = date_from_iso_str(message['date'])
232
233 match = self.manual_commit_regexp.match(text)
234 if match:
235 if self.start_time:
236 self._end('MANUAL_COMMIT',
237 date_from_iso_str(self.issue['modified']))
238 patch_number = int(match.group('patch_id'))
239 # gcl dcommit talks in terms of patch numbers 1-5, we need to map
240 # those to patchset ids:
241 if len(self.issue['patchsets']) > (patch_number-1):
242 patch_id = self.issue['patchsets'][patch_number-1]
243 else:
244 print '%s missing patch number %d' % (
245 self.issue_id, patch_number)
246 print self.issue['patchsets']
247 print 'issue, now = fetch_issue(\'%s\')' % self.issue_id
248 patch_id = self.issue['patchsets'][-1]
249 self._manual_commit(patch_id)
250 return
251
252 if message['sender'] != 'commit-bot@chromium.org':
253 return
254
255 match = self.start_regexp.match(text)
256 if match:
257 self._start(match.group('patch_id'), date, match.group('author'))
258 return
259 match = self.committed_regexp.match(text)
260 if match:
261 self._end('SUCCESS', date)
262 return
263
264 cq_end_messages = {
265 'TRY': 'Retried try job',
266 'APPLY': 'Failed to apply patch',
267 'APPLY2': 'Failed to apply the patch',
268 'COMPILE': 'Sorry for I got bad news for ya',
269 'DESCRIPTION_CHANGED': ('Commit queue rejected this change because '
270 'the description'),
271 # This is too conservative.
272 'REVIEWERS_CHANGED': 'List of reviewers changed.',
273 # User caused.
274 'PATCH_CHANGED': 'Commit queue failed due to new patchset.',
275 # FAILED_TO_TRIGGER is a very serious failure, unclear why it
276 # happens!
277 'FAILED_TO_TRIGGER': 'Failed to trigger a try job on',
278 # BINARY_FILE is just a bug in the CQ.
279 'BINARY_FILE': 'Can\'t process patch for file',
280 'BINARY_FILE2': 'Failed to request the patch to try',
281 # Unclear why UPDATE_STEP happens. Likely broken bots, shouldn't
282 # fail patches!
283 'UPDATE_STEP': 'Step "update" is always a major failure.',
284 'BERSERK': 'The commit queue went berserk',
285 'INTERNAL_ERROR': 'Commit queue had an internal error.',
286 }
287 for result, message_text in cq_end_messages.items():
288 if text.startswith(message_text):
289 self._end(result, date)
290 return
291
292 cq_fail_to_start_messsages = {
293 'FAILED_PATCHSET': 'Failed to get patchset properties',
294 'NO_COMMENTS': 'No comments yet.',
295 'NO_LGTM': 'No LGTM from a valid reviewer yet.',
296 }
297 for result, message_text in cq_fail_to_start_messsages.items():
298 if text.startswith(message_text):
299 self._start_and_end(result, date)
300 return
301
302 print 'Error %s has unknown message: %s' % (
303 issue_display_url(self.issue_id), text)
304 self._end('UNKNOWN', date)
305
306 def start_issue(self, issue, now):
307 # We may be dealing with cached data, so cache our concept of "now"
308 self.now = now
309 self.issue = issue
310 self.issue_id = issue['issue']
311
312 def end_issue(self, issue):
313 issue_id = issue['issue']
314 assert issue_id == self.issue_id
315 if self.start_time:
316 if self.issue['commit']:
317 print '%s in progress? %s' % (
318 round_timedelta(self.now - self.start_time),
319 issue_display_url(self.issue_id))
320 self._end('IN_PROGRESS', self.now)
321 else:
322 # There are various reasons why the commit box may have
323 # been unchecked, including that the issue may be closed now.
324 # If those are not from a user, then that's a bug in rietveld
325 # and out of scope for this script. For now we're just ignoring
326 # cases where the box was unchecked. We could log an _end()
327 # event, but which time would we use? last_modified has at
328 # best a weak corrolation with the issue close or CQ checkbox.
329 self._reset()
330 # _reset() shouldn't be needed, but just in case.
331 self._reset()
332 self.issue_id = None
333 self.issue = None
334
335 def process_issue(self, issue, now):
336 issue_id = issue['issue']
337
338 if re.search('^NOTRY=[tT][rR][uU][eE]$', issue['description'], re.M):
339 self.no_try_issues.add(issue_id)
340
341 self.start_issue(issue, now)
342 for message in issue['messages']:
343 self.process_message(message)
344 # print '%(date)s: %(text)s' % message
345 self.end_issue(issue)
346
347 def print_attempts_table(self):
348 result_getter = operator.attrgetter('result')
349 attempts = sorted(self.attempts, key=result_getter)
350
351 col_aligns = (str.rjust, str.ljust, str.rjust, str.rjust)
352 col_widths = (5, 20, 16, 16)
353 col_headers = ('count', 'result', 'mean duration', 'median duration')
354 print
355 print ('CQ Attempts by result (there can be more than '
356 'one attempt on the same patch)')
357 print format_line(col_headers, col_aligns, col_widths)
358 print '-' * (sum(col_widths) + len(col_widths) - 1)
359 for result, group_itr in itertools.groupby(attempts, result_getter):
360 group = list(group_itr)
361 durations = map(operator.attrgetter('duration'), group)
362 mean = sum(durations, timedelta()) / len(group)
363 durations_seconds = map(timedelta.total_seconds, durations)
364 median_seconds = numpy.median(durations_seconds)
365 median = timedelta(seconds=round(median_seconds))
366 values = (len(group), result, round_timedelta(mean), median)
367 print format_line(values, col_aligns, col_widths)
368 print
369
370
371 def main():
372 parser = argparse.ArgumentParser()
373 parser.add_argument('--base-url-stats', action='store_true',
374 help='print base_url matching stats')
375 parser.add_argument('--chrome', action='store_true')
376 parser.add_argument('--clear', action='store_true',
377 help='clear the cache before starting')
378 parser.add_argument('--count', type=int, default=None,
379 help='number of issues to fetch')
380 parser.add_argument('--modified-after', default=None,
381 help=('search for issues modified after '
382 'this date YYYY-MM-DD'))
383 parser.add_argument('--modified-before', default=None,
384 help=('search for issues modified before '
385 'this date YYYY-MM-DD'))
386 parser.add_argument('--progress', help='Show progress', action='store_true')
387 parser.add_argument('--skia', action='store_true')
388 args = parser.parse_args()
389
390 # Rietveld allows up to 1000 results per search query. I don't know if
391 # that's better for the server or not. We'll use 100 to be safe.
392 per_request_search_limit = 100
393 if args.count:
394 per_request_search_limit = min(int(args.count/2), 100)
395 # Currently looking at the last 2000 entries.
396 target_count = args.count
397 if args.clear:
398 print 'Clearing cache...'
399 session.cache.clear()
400 global config_name
401 if args.chrome:
402 config_name = 'chrome'
403 elif args.skia:
404 config_name = 'skia'
405 else:
406 config_name = 'blink'
407 print 'Fetching and analyzing most recent issues...'
408 analyzer = CQAnalyzer()
409
410 total_fetch_count = 0
411 issue_count = 0
412 cursor = None
413 url_counts = collections.Counter()
414 while True:
415 search_results = fetch_recent_issues(per_request_search_limit, args,
416 cursor)
417 if len(search_results['results']) == 0:
418 break
419 cursor = search_results['cursor']
420 total_fetch_count += per_request_search_limit
421
422 if args.base_url_stats:
423 url_counts.update([result['base_url']
424 for result in search_results['results']])
425
426 for result in search_results['results']:
427 if not matches_target_base_url(result['base_url']):
428 continue
429 issue_count += 1
430 if args.progress and (issue_count % 100 == 0):
431 print issue_count,
432 if target_count:
433 print target_count
434 issue, now = fetch_issue(result['issue'])
435 analyzer.process_issue(issue, now)
436 if target_count and issue_count >= target_count:
437 break
438
439 if args.base_url_stats:
440 print
441 for url, count in url_counts.most_common():
442 matches_target = matches_target_base_url(url)
443 print '%s %s %s' % (matches_target, count, url)
444
445 print
446 print 'Found %s issues (in the %s most recent issues).' % (
447 issue_count, total_fetch_count)
448 cq_issues = set([attempt.issue_id for attempt in analyzer.attempts])
449 cq_issue_count = len(cq_issues)
450 cq_patches = set([attempt.issue_patch_string
451 for attempt in analyzer.attempts])
452 cq_patch_count = len(cq_patches)
453 print ('%s issues (%d%%) (%d patches) tried the CQ, '
454 'resulting in %d CQ attempts.') % (
455 cq_issue_count, 100 * cq_issue_count / issue_count, cq_patch_count,
456 len(analyzer.attempts))
457
458 no_try_issues = len(cq_issues & analyzer.no_try_issues)
459 gave_up_patches = len(cq_patches & analyzer.manual_commits)
460 print ('%s of those (%d%%) were committed manually (gave up). '
461 '%s (%d%%) were marked NOTRY.') % (
462 gave_up_patches, 100 * gave_up_patches / cq_issue_count,
463 no_try_issues, 100 * no_try_issues / cq_issue_count)
464
465 analyzer.print_attempts_table()
466
467 identifier_getter = operator.attrgetter('issue_patch_string')
468 attempts = sorted(analyzer.attempts, key=identifier_getter)
469 extra_user_clicks = 0
470 eventually_success_durations = []
471 eventually_success_attempts = []
472
473 # pylint: disable=W0612
474 for identifier, group_itr in itertools.groupby(attempts, identifier_getter):
475 group = list(group_itr)
476 if group[0].issue_id in analyzer.no_try_issues:
477 continue
478 results = map(operator.attrgetter('result'), group)
479 # print '%s(%s): %s %s' % (identifier, len(group), total_time, results)
480 if 'SUCCESS' in results:
481 # 'RESTART' events may not require a user re-click?
482 user_initiated = filter(lambda result: result != 'RESTART', results)
483 extra_user_clicks += len(user_initiated) - 1
484 durations = map(operator.attrgetter('duration'), group)
485 eventually_success_durations.append(
486 sum(durations, timedelta()).total_seconds())
487 eventually_success_attempts.append(len(group))
488
489 print ('%s CQ starts (%d%%) were due a previous false-negative '
490 '(CQ landed a patch it previously rejected)') % (
491 extra_user_clicks, 100 * extra_user_clicks / len(analyzer.attempts))
492
493 print 'Patches which eventually land percentiles:'
494 for i in [10, 25, 50, 75, 90, 95, 99]:
495 eventually_success_durations_p = round(numpy.percentile(
496 eventually_success_durations, i)/(60*60), 3)
497 eventually_success_attempts_p = numpy.percentile(
498 eventually_success_attempts, i)
499 print '%d: %.3f hrs, %.3f attempts' % (
500 i, eventually_success_durations_p, eventually_success_attempts_p)
501
502
503 if __name__ == '__main__':
504 sys.exit(main())
OLDNEW
« no previous file with comments | « commit-queue/tools/process.py ('k') | commit-queue/tools/watch.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698