commit-queue/tools/stats.py - Issue 135363007: Delete public commit queue to avoid confusion after move to internal repo

Side by Side Diff: commit-queue/tools/stats.py

Issue 135363007: Delete public commit queue to avoid confusion after move to internal repo (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/python

2 # Copyright 2013 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5 '''Generate stats of CQ usage.'''

6

7 # sudo apt-get install pip; sudo pip install requests request_cache

8 import argparse

9 import collections

10 from datetime import timedelta, datetime

11 import itertools

12 import numpy

13 import operator

14 import re

15 import requests_cache

16 import time

17 import sys

18

19

20 class Unbuffered:

21 def __init__(self, stream):

22 self.stream = stream

23 def write(self, data):

24 self.stream.write(data)

25 self.stream.flush()

26 def __getattr__(self, attr):

27 return getattr(self.stream, attr)

28 sys.stdout = Unbuffered(sys.stdout)

29

30

31 CONFIG = {

32 'chrome': {

33 'target_base_url_regexps': [

34 re.compile(r'^svn://svn\.chromium\.org/chrome/trunk/src'),

35 # pylint: disable=C0301

36 re.compile(r'^https://chromium\.googlesource\.com/chromium/chromium/src(?: \.git)?(?:@[\w\-]+)?$'),

37 re.compile(r'^http://src\.chromium\.org/chrome/trunk/src'),

38 ],

39 },

40 'blink': {

41 'target_base_url_regexps': [

42 re.compile(r'^svn://svn\.chromium\.org/blink/(?:trunk/?)?$'),

43 # pylint: disable=C0301

44 re.compile(r'^https://chromium\.googlesource\.com/chromium/blink(?:\.git)? (?:@[\w\-]+)?$'),

45 re.compile(r'^http://src\.chromium\.org/blink/(?:trunk/?)?$'),

46 ],

47 },

48 'skia': {

49 'target_base_url_regexps': [

50 re.compile(r'^http://skia.googlecode.com/svn/trunk'),

51 re.compile(r'^https://skia.googlecode.com/svn/trunk'),

52 ],

53 },

54 }

55 config_name = None

56

57

58 def make_throttle_hook(timeout=1.0):

59 # pylint: disable=W0613

60 def hook(response, args, *kwargs):

61 print response.url

62 if not getattr(response, 'from_cache', False):

63 print 'sleeping'

64 time.sleep(timeout)

65 return response

66 return hook

67

68

69 # It seems reasonable to expect that rietveld can handle 20 query-per-second

70 # load from this script. Ask someone on the infra team before reducing this

71 # value.

72 seconds_between_requests = 0.05

73

74 requests_cache.install_cache('cq')

75 session = requests_cache.CachedSession()

76 # if args.clear:

77 # print 'Clearing cache...'

78 # session.cache.clear()

79 session.hooks = {'response': make_throttle_hook(seconds_between_requests)}

80

81

82 def fetch_recent_issues(limit, args, cursor=None):

83 search_url = ('https://codereview.chromium.org/search?format=json&'

84 'limit=%d') % limit

85 if cursor:

86 search_url += '&cursor=%s' % cursor

87 if args.modified_after:

88 search_url += '&modified_after=%s' % args.modified_after

89 if args.modified_before:

90 search_url += '&modified_before=%s' % args.modified_before

91 response = session.get(search_url)

92 return response.json()

93

94

95 def get_target_base_url_regexps():

96 return CONFIG[config_name]['target_base_url_regexps']

97

98

99 def matches_target_base_url(url):

100 for regexp in get_target_base_url_regexps():

101 match = regexp.match(url)

102 if match:

103 return True

104 return False

105

106

107 def issue_patch_string(issue_id, patch_id):

108 return '%s:%s' % (issue_id, patch_id)

109

110

111 def issue_display_url(issue_id):

112 return 'https://codereview.chromium.org/%s' % issue_id

113

114

115 def issue_url(issue_id):

116 return 'https://codereview.chromium.org/api/%s?messages=true' % issue_id

117

118

119 def date_from_iso_str(iso_str):

120 # Issues have one of two formats:

121 # 2013-10-17 16:43:04.391480

122 # 2013-10-17 16:43:04

123 try:

124 return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S.%f')

125 except ValueError:

126 return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S')

127

128

129 def round_timedelta(delta):

130 # We never care about the milliseconds when printing timedeltas:

131 return timedelta(seconds=round(delta.total_seconds()))

132

133

134 def fetch_issue(issue_id):

135 url = issue_url(issue_id)

136 this_json = session.get(url).json()

137 # pylint: disable=W0212

138 cache_key = session.cache._url_to_key(url)

139 _, now = session.cache.get_response_and_time(cache_key)

140 return this_json, now

141

142

143 def format_line(values, aligns, widths):

144 values = map(str, values)

145 args = zip(values, aligns, widths)

146 return ' '.join([apply(align, (value, width))

147 for value, align, width in args])

148

149

150 class CQAttempt(object):

151 def __init__(self):

152 self.result = None

153 self.patch_id = None

154 self.issue_id = None

155 self.author = None

156 self.start_time = None

157 self.end_time = None

158

159 @property

160 def issue_patch_string(self):

161 return issue_patch_string(self.issue_id, self.patch_id)

162

163 @property

164 def duration(self):

165 return self.end_time - self.start_time

166

167

168 class CQAnalyzer(object):

169 def __init__(self):

170 self.state = None

171 self.issue = None

172 self.issue_id = None

173 self.author = None

174 self.patch_id = None

175 self.start_time = None

176 self.attempts = []

177 self.manual_commits = set()

178 self.no_try_issues = set()

179 self.now = None

180 # issue_id -> time since CQ start

181 self.in_progress = {}

182

183 # pylint: disable=C0301

184 start_regexp = re.compile('''CQ is trying da patch. Follow status at

185 https://chromium-status.appspot.com/cq/(?P<author>.*)/(?P<issue_id>\d+)/(?P<patc h_id>\d+)''')

186 committed_regexp = re.compile('Change committed as (?P<revision>\d+)')

187 # pylint: disable=C0301

188 manual_commit_regexp = re.compile('Committed patchset #(?P<patch_id>\d+) manua lly as r(?P<revision>\d+)')

189

190 def _reset(self):

191 self.state = None

192 self.author = None

193 self.patch_id = None

194 self.start_time = None

195

196 def _start(self, patch_id, start_time, author=None):

197 if self.state == 'START' and self.patch_id == patch_id:

198 # print 'Restarted for patchset %s on %s' % (

199 # patch_id, issue_display_url(self.issue_id))

200 self._end('RESTART', start_time)

201 self.state = 'START'

202 self.patch_id = patch_id

203 self.author = author

204 assert start_time

205 self.start_time = start_time

206

207 def _end(self, result, end_time):

208 if self.state != 'START':

209 print 'ERROR: %s for %s without start??' % (

210 result, issue_display_url(self.issue_id))

211 self._reset()

212 return

213 attempt = CQAttempt()

214 attempt.result = result

215 attempt.patch_id = self.patch_id

216 attempt.issue_id = self.issue_id

217 attempt.start_time = self.start_time

218 attempt.end_time = end_time

219 self.attempts.append(attempt)

220 self._reset()

221

222 def _start_and_end(self, result, date):

223 self._start(result, date)

224 self._end(result, date)

225

226 def _manual_commit(self, patch_id):

227 self.manual_commits.add(issue_patch_string(self.issue_id, patch_id))

228

229 def process_message(self, message):

230 text = message['text']

231 date = date_from_iso_str(message['date'])

232

233 match = self.manual_commit_regexp.match(text)

234 if match:

235 if self.start_time:

236 self._end('MANUAL_COMMIT',

237 date_from_iso_str(self.issue['modified']))

238 patch_number = int(match.group('patch_id'))

239 # gcl dcommit talks in terms of patch numbers 1-5, we need to map

240 # those to patchset ids:

241 if len(self.issue['patchsets']) > (patch_number-1):

242 patch_id = self.issue['patchsets'][patch_number-1]

243 else:

244 print '%s missing patch number %d' % (

245 self.issue_id, patch_number)

246 print self.issue['patchsets']

247 print 'issue, now = fetch_issue(\'%s\')' % self.issue_id

248 patch_id = self.issue['patchsets'][-1]

249 self._manual_commit(patch_id)

250 return

251

252 if message['sender'] != 'commit-bot@chromium.org':

253 return

254

255 match = self.start_regexp.match(text)

256 if match:

257 self._start(match.group('patch_id'), date, match.group('author'))

258 return

259 match = self.committed_regexp.match(text)

260 if match:

261 self._end('SUCCESS', date)

262 return

263

264 cq_end_messages = {

265 'TRY': 'Retried try job',

266 'APPLY': 'Failed to apply patch',

267 'APPLY2': 'Failed to apply the patch',

268 'COMPILE': 'Sorry for I got bad news for ya',

269 'DESCRIPTION_CHANGED': ('Commit queue rejected this change because '

270 'the description'),

271 # This is too conservative.

272 'REVIEWERS_CHANGED': 'List of reviewers changed.',

273 # User caused.

274 'PATCH_CHANGED': 'Commit queue failed due to new patchset.',

275 # FAILED_TO_TRIGGER is a very serious failure, unclear why it

276 # happens!

277 'FAILED_TO_TRIGGER': 'Failed to trigger a try job on',

278 # BINARY_FILE is just a bug in the CQ.

279 'BINARY_FILE': 'Can\'t process patch for file',

280 'BINARY_FILE2': 'Failed to request the patch to try',

281 # Unclear why UPDATE_STEP happens. Likely broken bots, shouldn't

282 # fail patches!

283 'UPDATE_STEP': 'Step "update" is always a major failure.',

284 'BERSERK': 'The commit queue went berserk',

285 'INTERNAL_ERROR': 'Commit queue had an internal error.',

286 }

287 for result, message_text in cq_end_messages.items():

288 if text.startswith(message_text):

289 self._end(result, date)

290 return

291

292 cq_fail_to_start_messsages = {

293 'FAILED_PATCHSET': 'Failed to get patchset properties',

294 'NO_COMMENTS': 'No comments yet.',

295 'NO_LGTM': 'No LGTM from a valid reviewer yet.',

296 }

297 for result, message_text in cq_fail_to_start_messsages.items():

298 if text.startswith(message_text):

299 self._start_and_end(result, date)

300 return

301

302 print 'Error %s has unknown message: %s' % (

303 issue_display_url(self.issue_id), text)

304 self._end('UNKNOWN', date)

305

306 def start_issue(self, issue, now):

307 # We may be dealing with cached data, so cache our concept of "now"

308 self.now = now

309 self.issue = issue

310 self.issue_id = issue['issue']

311

312 def end_issue(self, issue):

313 issue_id = issue['issue']

314 assert issue_id == self.issue_id

315 if self.start_time:

316 if self.issue['commit']:

317 print '%s in progress? %s' % (

318 round_timedelta(self.now - self.start_time),

319 issue_display_url(self.issue_id))

320 self._end('IN_PROGRESS', self.now)

321 else:

322 # There are various reasons why the commit box may have

323 # been unchecked, including that the issue may be closed now.

324 # If those are not from a user, then that's a bug in rietveld

325 # and out of scope for this script. For now we're just ignoring

326 # cases where the box was unchecked. We could log an _end()

327 # event, but which time would we use? last_modified has at

328 # best a weak corrolation with the issue close or CQ checkbox.

329 self._reset()

330 # _reset() shouldn't be needed, but just in case.

331 self._reset()

332 self.issue_id = None

333 self.issue = None

334

335 def process_issue(self, issue, now):

336 issue_id = issue['issue']

337

338 if re.search('^NOTRY=[tT][rR][uU][eE]$', issue['description'], re.M):

339 self.no_try_issues.add(issue_id)

340

341 self.start_issue(issue, now)

342 for message in issue['messages']:

343 self.process_message(message)

344 # print '%(date)s: %(text)s' % message

345 self.end_issue(issue)

346

347 def print_attempts_table(self):

348 result_getter = operator.attrgetter('result')

349 attempts = sorted(self.attempts, key=result_getter)

350

351 col_aligns = (str.rjust, str.ljust, str.rjust, str.rjust)

352 col_widths = (5, 20, 16, 16)

353 col_headers = ('count', 'result', 'mean duration', 'median duration')

354 print

355 print ('CQ Attempts by result (there can be more than '

356 'one attempt on the same patch)')

357 print format_line(col_headers, col_aligns, col_widths)

358 print '-' * (sum(col_widths) + len(col_widths) - 1)

359 for result, group_itr in itertools.groupby(attempts, result_getter):

360 group = list(group_itr)

361 durations = map(operator.attrgetter('duration'), group)

362 mean = sum(durations, timedelta()) / len(group)

363 durations_seconds = map(timedelta.total_seconds, durations)

364 median_seconds = numpy.median(durations_seconds)

365 median = timedelta(seconds=round(median_seconds))

366 values = (len(group), result, round_timedelta(mean), median)

367 print format_line(values, col_aligns, col_widths)

368 print

369

370

371 def main():

372 parser = argparse.ArgumentParser()

373 parser.add_argument('--base-url-stats', action='store_true',

374 help='print base_url matching stats')

375 parser.add_argument('--chrome', action='store_true')

376 parser.add_argument('--clear', action='store_true',

377 help='clear the cache before starting')

378 parser.add_argument('--count', type=int, default=None,

379 help='number of issues to fetch')

380 parser.add_argument('--modified-after', default=None,

381 help=('search for issues modified after '

382 'this date YYYY-MM-DD'))

383 parser.add_argument('--modified-before', default=None,

384 help=('search for issues modified before '

385 'this date YYYY-MM-DD'))

386 parser.add_argument('--progress', help='Show progress', action='store_true')

387 parser.add_argument('--skia', action='store_true')

388 args = parser.parse_args()

389

390 # Rietveld allows up to 1000 results per search query. I don't know if

391 # that's better for the server or not. We'll use 100 to be safe.

392 per_request_search_limit = 100

393 if args.count:

394 per_request_search_limit = min(int(args.count/2), 100)

395 # Currently looking at the last 2000 entries.

396 target_count = args.count

397 if args.clear:

398 print 'Clearing cache...'

399 session.cache.clear()

400 global config_name

401 if args.chrome:

402 config_name = 'chrome'

403 elif args.skia:

404 config_name = 'skia'

405 else:

406 config_name = 'blink'

407 print 'Fetching and analyzing most recent issues...'

408 analyzer = CQAnalyzer()

409

410 total_fetch_count = 0

411 issue_count = 0

412 cursor = None

413 url_counts = collections.Counter()

414 while True:

415 search_results = fetch_recent_issues(per_request_search_limit, args,

416 cursor)

417 if len(search_results['results']) == 0:

418 break

419 cursor = search_results['cursor']

420 total_fetch_count += per_request_search_limit

421

422 if args.base_url_stats:

423 url_counts.update([result['base_url']

424 for result in search_results['results']])

425

426 for result in search_results['results']:

427 if not matches_target_base_url(result['base_url']):

428 continue

429 issue_count += 1

430 if args.progress and (issue_count % 100 == 0):

431 print issue_count,

432 if target_count:

433 print target_count

434 issue, now = fetch_issue(result['issue'])

435 analyzer.process_issue(issue, now)

436 if target_count and issue_count >= target_count:

437 break

438

439 if args.base_url_stats:

440 print

441 for url, count in url_counts.most_common():

442 matches_target = matches_target_base_url(url)

443 print '%s %s %s' % (matches_target, count, url)

444

445 print

446 print 'Found %s issues (in the %s most recent issues).' % (

447 issue_count, total_fetch_count)

448 cq_issues = set([attempt.issue_id for attempt in analyzer.attempts])

449 cq_issue_count = len(cq_issues)

450 cq_patches = set([attempt.issue_patch_string

451 for attempt in analyzer.attempts])

452 cq_patch_count = len(cq_patches)

453 print ('%s issues (%d%%) (%d patches) tried the CQ, '

454 'resulting in %d CQ attempts.') % (

455 cq_issue_count, 100 * cq_issue_count / issue_count, cq_patch_count,

456 len(analyzer.attempts))

457

458 no_try_issues = len(cq_issues & analyzer.no_try_issues)

459 gave_up_patches = len(cq_patches & analyzer.manual_commits)

460 print ('%s of those (%d%%) were committed manually (gave up). '

461 '%s (%d%%) were marked NOTRY.') % (

462 gave_up_patches, 100 * gave_up_patches / cq_issue_count,

463 no_try_issues, 100 * no_try_issues / cq_issue_count)

464

465 analyzer.print_attempts_table()

466

467 identifier_getter = operator.attrgetter('issue_patch_string')

468 attempts = sorted(analyzer.attempts, key=identifier_getter)

469 extra_user_clicks = 0

470 eventually_success_durations = []

471 eventually_success_attempts = []

472

473 # pylint: disable=W0612

474 for identifier, group_itr in itertools.groupby(attempts, identifier_getter):

475 group = list(group_itr)

476 if group[0].issue_id in analyzer.no_try_issues:

477 continue

478 results = map(operator.attrgetter('result'), group)

479 # print '%s(%s): %s %s' % (identifier, len(group), total_time, results)

480 if 'SUCCESS' in results:

481 # 'RESTART' events may not require a user re-click?

482 user_initiated = filter(lambda result: result != 'RESTART', results)

483 extra_user_clicks += len(user_initiated) - 1

484 durations = map(operator.attrgetter('duration'), group)

485 eventually_success_durations.append(

486 sum(durations, timedelta()).total_seconds())

487 eventually_success_attempts.append(len(group))

488

489 print ('%s CQ starts (%d%%) were due a previous false-negative '

490 '(CQ landed a patch it previously rejected)') % (

491 extra_user_clicks, 100 * extra_user_clicks / len(analyzer.attempts))

492

493 print 'Patches which eventually land percentiles:'

494 for i in [10, 25, 50, 75, 90, 95, 99]:

495 eventually_success_durations_p = round(numpy.percentile(

496 eventually_success_durations, i)/(60*60), 3)

497 eventually_success_attempts_p = numpy.percentile(

498 eventually_success_attempts, i)

499 print '%d: %.3f hrs, %.3f attempts' % (

500 i, eventually_success_durations_p, eventually_success_attempts_p)

501

502

503 if __name__ == '__main__':

504 sys.exit(main())

OLD	NEW

« no previous file with comments | « commit-queue/tools/process.py ('k') | commit-queue/tools/watch.py » ('j') | no next file with comments »