Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(132)

Side by Side Diff: recipe_engine/fetch.py

Issue 2833723003: [autoroller] All commits in updates(), only roll interesting ones. (Closed)
Patch Set: Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 import calendar 5 import calendar
6 import httplib 6 import httplib
7 import json 7 import json
8 import logging 8 import logging
9 import os 9 import os
10 import re 10 import re
(...skipping 13 matching lines...) Expand all
24 from . import env 24 from . import env
25 from . import requests_ssl 25 from . import requests_ssl
26 from .requests_ssl import requests 26 from .requests_ssl import requests
27 27
28 import subprocess42 28 import subprocess42
29 from google.protobuf import json_format 29 from google.protobuf import json_format
30 30
31 LOGGER = logging.getLogger(__name__) 31 LOGGER = logging.getLogger(__name__)
32 32
33 33
34 def has_interesting_changes(spec, changed_files):
35 # TODO(iannucci): analyze bundle_extra_paths.txt too.
36 return (
37 'infra/config/recipes.cfg' in changed_files or
38 any(f.startswith(spec.recipes_path) for f in changed_files)
39 )
40
41
34 class FetchError(Exception): 42 class FetchError(Exception):
35 pass 43 pass
36 44
37 45
38 class FetchNotAllowedError(FetchError): 46 class FetchNotAllowedError(FetchError):
39 pass 47 pass
40 48
41 49
42 class UnresolvedRefspec(Exception): 50 class UnresolvedRefspec(Exception):
43 pass 51 pass
44 52
45 53
46 # revision (str): the revision of this commit (i.e. hash) 54 # revision (str): the revision of this commit (i.e. hash)
47 # author_email (str|None): the email of the author of this commit 55 # author_email (str|None): the email of the author of this commit
48 # commit_timestamp (int): the unix commit timestamp for this commit 56 # commit_timestamp (int): the unix commit timestamp for this commit
49 # message_lines (tuple(str)): the message of this commit 57 # message_lines (tuple(str)): the message of this commit
50 # spec (package_pb2.Package): the parsed infra/config/recipes.cfg file or None. 58 # spec (package_pb2.Package): the parsed infra/config/recipes.cfg file or None.
59 # roll_candidate (bool): if this commit contains changes which are known to
60 # affect the behavior of the recipes (i.e. modifications within recipe_path
61 # and/or modifications to recipes.cfg)
51 CommitMetadata = namedtuple( 62 CommitMetadata = namedtuple(
52 '_CommitMetadata', 63 '_CommitMetadata',
53 'revision author_email commit_timestamp message_lines spec') 64 'revision author_email commit_timestamp message_lines spec roll_candidate')
54 65
55 66
56 class Backend(object): 67 class Backend(object):
57 @staticmethod 68 @staticmethod
58 def class_for_type(repo_type): 69 def class_for_type(repo_type):
59 """ 70 """
60 Args: 71 Args:
61 repo_type (package_pb2.DepSpec.RepoType) 72 repo_type (package_pb2.DepSpec.RepoType)
62 73
63 Returns Backend (class): Returns the Backend appropriate for the 74 Returns Backend (class): Returns the Backend appropriate for the
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
107 _GIT_METADATA_CACHE = {} 118 _GIT_METADATA_CACHE = {}
108 119
109 # This matches git commit hashes. 120 # This matches git commit hashes.
110 _COMMIT_RE = re.compile(r'^[a-fA-F0-9]{40}$') 121 _COMMIT_RE = re.compile(r'^[a-fA-F0-9]{40}$')
111 122
112 def commit_metadata(self, refspec): 123 def commit_metadata(self, refspec):
113 """Cached version of _commit_metadata_impl. 124 """Cached version of _commit_metadata_impl.
114 125
115 The refspec will be resolved if it's not absolute. 126 The refspec will be resolved if it's not absolute.
116 127
117 Returns { 128 Returns (CommitMetadata).
118 'author': '<author name>',
119 'message': '<commit message>',
120 'spec': package_pb2.Package or None, # the parsed recipes.cfg file.
121 }
122 """ 129 """
123 revision = self.resolve_refspec(refspec) 130 revision = self.resolve_refspec(refspec)
124 cache = self._GIT_METADATA_CACHE.setdefault(self.repo_url, {}) 131 cache = self._GIT_METADATA_CACHE.setdefault(self.repo_url, {})
125 if revision not in cache: 132 if revision not in cache:
126 cache[revision] = self._commit_metadata_impl(revision) 133 cache[revision] = self._commit_metadata_impl(revision)
127 return cache[revision] 134 return cache[revision]
128 135
129 @classmethod 136 @classmethod
130 def is_resolved_revision(cls, revision): 137 def is_resolved_revision(cls, revision):
131 return cls._COMMIT_RE.match(revision) 138 return cls._COMMIT_RE.match(revision)
132 139
133 @classmethod 140 @classmethod
134 def assert_resolved(cls, revision): 141 def assert_resolved(cls, revision):
135 if not cls.is_resolved_revision(revision): 142 if not cls.is_resolved_revision(revision):
136 raise UnresolvedRefspec('unresolved refspec %r' % revision) 143 raise UnresolvedRefspec('unresolved refspec %r' % revision)
137 144
138 def resolve_refspec(self, refspec): 145 def resolve_refspec(self, refspec):
139 if self.is_resolved_revision(refspec): 146 if self.is_resolved_revision(refspec):
140 return refspec 147 return refspec
141 return self._resolve_refspec_impl(refspec) 148 return self._resolve_refspec_impl(refspec)
142 149
143 def updates(self, revision, other_revision, paths): 150 def updates(self, revision, other_revision):
144 """Returns a list of revisions |revision| through |other_revision| 151 """Returns a list of revisions |revision| through |other_revision|
145 (inclusive). 152 (inclusive).
146 153
147 If |paths| is a non-empty list, the history is scoped just to these paths.
148
149 Returns list(CommitMetadata) - The commit metadata in the range 154 Returns list(CommitMetadata) - The commit metadata in the range
150 (revision,other_revision]. 155 (revision,other_revision].
151 """ 156 """
152 self.assert_resolved(revision) 157 self.assert_resolved(revision)
153 self.assert_resolved(other_revision) 158 self.assert_resolved(other_revision)
154 return self._updates_impl(revision, other_revision, paths) 159 return self._updates_impl(revision, other_revision)
155 160
156 ### direct overrides. These are public methods which must be overridden. 161 ### direct overrides. These are public methods which must be overridden.
157 162
158 @property 163 @property
159 def repo_type(self): 164 def repo_type(self):
160 """Returns package_pb2.DepSpec.RepoType.""" 165 """Returns package_pb2.DepSpec.RepoType."""
161 raise NotImplementedError() 166 raise NotImplementedError()
162 167
163 def fetch(self, refspec): 168 def fetch(self, refspec):
164 """Does a fetch for the provided refspec (e.g. get all data from remote), if 169 """Does a fetch for the provided refspec (e.g. get all data from remote), if
(...skipping 13 matching lines...) Expand all
178 remote git repo, e.g. 'refs/heads/master', 'deadbeef...face', etc. 183 remote git repo, e.g. 'refs/heads/master', 'deadbeef...face', etc.
179 """ 184 """
180 # TODO(iannucci): Alter the contract for this method so that it only checks 185 # TODO(iannucci): Alter the contract for this method so that it only checks
181 # out the files referred to according to the rules that the bundle 186 # out the files referred to according to the rules that the bundle
182 # subcommand uses. 187 # subcommand uses.
183 raise NotImplementedError() 188 raise NotImplementedError()
184 189
185 ### private overrides. Override these in the implementations, but don't call 190 ### private overrides. Override these in the implementations, but don't call
186 ### externally. 191 ### externally.
187 192
188 def _updates_impl(self, revision, other_revision, paths): 193 def _updates_impl(self, revision, other_revision):
189 """Returns a list of revisions |revision| through |other_revision|. This 194 """Returns a list of revisions |revision| through |other_revision|. This
190 includes |revision| and |other_revision|. 195 includes |revision| and |other_revision|.
191 196
192 If |paths| is a non-empty list, the history is scoped just to these paths.
193
194 Args: 197 Args:
195 revision (str) - the first git commit 198 revision (str) - the first git commit
196 other_revision (str) - the second git commit 199 other_revision (str) - the second git commit
197 200
198 Returns list(CommitMetadata) - The commit metadata in the range 201 Returns list(CommitMetadata) - The commit metadata in the range
199 [revision,other_revision]. 202 [revision,other_revision].
200 """ 203 """
201 raise NotImplementedError() 204 raise NotImplementedError()
202 205
203 def _resolve_refspec_impl(self, refspec): 206 def _resolve_refspec_impl(self, refspec):
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
320 self.fetch(refspec) 323 self.fetch(refspec)
321 324
322 # reset touches index.lock which is problematic when multiple processes are 325 # reset touches index.lock which is problematic when multiple processes are
323 # accessing the recipes at the same time. To allieviate this, we do a quick 326 # accessing the recipes at the same time. To allieviate this, we do a quick
324 # diff, which will exit if `revision` is not already checked out. 327 # diff, which will exit if `revision` is not already checked out.
325 try: 328 try:
326 self._git('diff', '--quiet', revision) 329 self._git('diff', '--quiet', revision)
327 except GitError: 330 except GitError:
328 self._git('reset', '-q', '--hard', revision) 331 self._git('reset', '-q', '--hard', revision)
329 332
330 def _updates_impl(self, revision, other_revision, paths): 333 def _updates_impl(self, revision, other_revision):
331 args = [ 334 args = [
332 'rev-list', 335 'rev-list',
333 '--reverse', 336 '--reverse',
334 '--topo-order', 337 '--topo-order',
335 '%s..%s' % (revision, other_revision), 338 '%s..%s' % (revision, other_revision),
336 ] 339 ]
337 if paths:
338 args.extend(['--'] + paths)
339 return [ 340 return [
340 self.commit_metadata(rev) 341 self.commit_metadata(rev)
341 for rev in self._git(*args).strip().split('\n') 342 for rev in self._git(*args).strip().split('\n')
342 if bool(rev) 343 if bool(rev)
343 ] 344 ]
344 345
345 def _resolve_refspec_impl(self, revision): 346 def _resolve_refspec_impl(self, revision):
346 self._ensure_local_repo_exists() 347 self._ensure_local_repo_exists()
347 self.assert_remote('resolve refspec %r' % revision) 348 self.assert_remote('resolve refspec %r' % revision)
348 rslt = self._git('ls-remote', self.repo_url, revision).split()[0] 349 rslt = self._git('ls-remote', self.repo_url, revision).split()[0]
(...skipping 11 matching lines...) Expand all
360 # %`Body` 361 # %`Body`
361 meta = self._git( 362 meta = self._git(
362 'show', '-s', '--format=%aE%n%ct%n%B', revision).rstrip('\n').splitlines() 363 'show', '-s', '--format=%aE%n%ct%n%B', revision).rstrip('\n').splitlines()
363 364
364 try: 365 try:
365 spec = package_io.parse(self._git( 366 spec = package_io.parse(self._git(
366 'cat-file', 'blob', '%s:infra/config/recipes.cfg' % revision)) 367 'cat-file', 'blob', '%s:infra/config/recipes.cfg' % revision))
367 except GitError: 368 except GitError:
368 spec = None 369 spec = None
369 370
371 # check diff to see if it touches anything interesting.
372 changed_files = set(self._git(
373 'diff-tree', '-r', '--no-commit-id', '--name-only', '%s^!' % revision)
374 .splitlines())
375
370 return CommitMetadata(revision, meta[0], 376 return CommitMetadata(revision, meta[0],
371 int(meta[1]), tuple(meta[2:]), 377 int(meta[1]), tuple(meta[2:]),
372 spec) 378 spec, has_interesting_changes(spec, changed_files))
373 379
374 class GitilesFetchError(FetchError): 380 class GitilesFetchError(FetchError):
375 """An HTTP error that occurred during Gitiles fetching.""" 381 """An HTTP error that occurred during Gitiles fetching."""
376 382
377 def __init__(self, status, message): 383 def __init__(self, status, message):
378 super(GitilesFetchError, self).__init__( 384 super(GitilesFetchError, self).__init__(
379 'Gitiles error code (%d): %s' % (status, message)) 385 'Gitiles error code (%d): %s' % (status, message))
380 self.status = status 386 self.status = status
381 self.message = message 387 self.message = message
382 388
383 @staticmethod 389 @staticmethod
384 def transient(e): 390 def transient(e):
385 """ 391 """
386 Returns (bool): True if "e" is a GitilesFetchError with transient HTTP code. 392 Returns (bool): True if "e" is a GitilesFetchError with transient HTTP code.
387 """ 393 """
388 return (isinstance(e, GitilesFetchError) and 394 return (isinstance(e, GitilesFetchError) and
389 e.status >= httplib.INTERNAL_SERVER_ERROR) 395 e.status >= httplib.INTERNAL_SERVER_ERROR)
390 396
391 397
392 # Internal cache object for GitilesBackend. 398 # Internal cache object for GitilesBackend.
393 # commit (str) - the git commit hash 399 # commit (str) - the git commit hash
394 # author_email (str) - the author email for this commit 400 # author_email (str) - the author email for this commit
395 # message_lines (tuple) - the lines of the commit message 401 # message_lines (tuple) - the lines of the commit message
402 # changed_files (frozenset) - all paths touched by this commit
396 class _GitilesCommitJson(namedtuple( 403 class _GitilesCommitJson(namedtuple(
397 '_GitilesCommitJson', 404 '_GitilesCommitJson',
398 'commit author_email commit_timestamp message_lines')): 405 'commit author_email commit_timestamp message_lines changed_files')):
399 @classmethod 406 @classmethod
400 def from_raw_json(cls, raw): 407 def from_raw_json(cls, raw):
408 mod_files = set()
409 for entry in raw['tree_diff']:
410 mod_files.add(entry['old_path'])
dnj 2017/04/20 18:16:21 Not sure if all of these fields are guaranteed to
iannucci 2017/04/20 21:53:20 They are, if there are entries in tree_diff.
411 mod_files.add(entry['new_path'])
401 return cls( 412 return cls(
402 raw['commit'], 413 raw['commit'],
403 raw['author']['email'], 414 raw['author']['email'],
404 calendar.timegm(time.strptime(raw['committer']['time'])), 415 calendar.timegm(time.strptime(raw['committer']['time'])),
405 tuple(raw['message'].splitlines()), 416 tuple(raw['message'].splitlines()),
417 frozenset(mod_files),
406 ) 418 )
407 419
408 420
409 class GitilesBackend(Backend): 421 class GitilesBackend(Backend):
410 """GitilesBackend uses a repo served by Gitiles.""" 422 """GitilesBackend uses a repo served by Gitiles."""
411 423
412 # Prefix at the beginning of Gerrit/Gitiles JSON API responses. 424 # Prefix at the beginning of Gerrit/Gitiles JSON API responses.
413 _GERRIT_XSRF_HEADER = ')]}\'\n' 425 _GERRIT_XSRF_HEADER = ')]}\'\n'
414 426
415 @util.exponential_retry(condition=GitilesFetchError.transient) 427 @util.exponential_retry(condition=GitilesFetchError.transient)
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
459 471
460 def _fetch_commit_json(self, refspec): 472 def _fetch_commit_json(self, refspec):
461 """Returns _GitilesCommitJson for the refspec. 473 """Returns _GitilesCommitJson for the refspec.
462 474
463 If refspec is resolved then this value is cached. 475 If refspec is resolved then this value is cached.
464 """ 476 """
465 c = self._COMMIT_JSON_CACHE.setdefault(self.repo_url, {}) 477 c = self._COMMIT_JSON_CACHE.setdefault(self.repo_url, {})
466 if refspec in c: 478 if refspec in c:
467 return c[refspec] 479 return c[refspec]
468 480
469 raw = self._fetch_gitiles_json('+/%s?format=JSON', refspec) 481 raw = self._fetch_gitiles_json('+/%s?name-status=1&format=JSON', refspec)
dnj 2017/04/20 18:16:21 Can we document what "name-status=1" does? format=
iannucci 2017/04/20 21:53:20 Done.
470 ret = _GitilesCommitJson.from_raw_json(raw) 482 ret = _GitilesCommitJson.from_raw_json(raw)
471 if self.is_resolved_revision(refspec): 483 if self.is_resolved_revision(refspec):
472 c[refspec] = ret 484 c[refspec] = ret
473 485
474 return ret 486 return ret
475 487
476 488
477 ### Backend implementations 489 ### Backend implementations
478 490
479 491
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
518 530
519 # TODO(iannucci): This implementation may be slow if we need to retieve 531 # TODO(iannucci): This implementation may be slow if we need to retieve
520 # multiple files/archives from the remote server. Should possibly consider 532 # multiple files/archives from the remote server. Should possibly consider
521 # using a thread pool here. 533 # using a thread pool here.
522 534
523 archive_response = self._fetch_gitiles( 535 archive_response = self._fetch_gitiles(
524 '+archive/%s/%s.tar.gz', revision, recipes_path_rel) 536 '+archive/%s/%s.tar.gz', revision, recipes_path_rel)
525 with tarfile.open(fileobj=StringIO(archive_response.content)) as tf: 537 with tarfile.open(fileobj=StringIO(archive_response.content)) as tf:
526 tf.extractall(recipes_path) 538 tf.extractall(recipes_path)
527 539
528 def _updates_impl(self, revision, other_revision, paths): 540 def _updates_impl(self, revision, other_revision):
529 self.assert_remote('_updates_impl') 541 self.assert_remote('_updates_impl')
530 542
531 # TODO(iannucci): implement paging 543 # TODO(iannucci): implement paging
532 544
533 # To include info about touched paths (tree_diff), pass name-status=1 below. 545 # To include info about touched paths (tree_diff), pass name-status=1 below.
534 log_json = self._fetch_gitiles_json( 546 log_json = self._fetch_gitiles_json(
535 '+log/%s..%s?name-status=1&format=JSON', revision, other_revision) 547 '+log/%s..%s?name-status=1&format=JSON', revision, other_revision)
536 548
537 c = self._COMMIT_JSON_CACHE.setdefault(self.repo_url, {}) 549 c = self._COMMIT_JSON_CACHE.setdefault(self.repo_url, {})
538 550
539 results = [] 551 results = []
540 for entry in log_json['log']: 552 for entry in log_json['log']:
541 commit = entry['commit'] 553 commit = entry['commit']
542 c[commit] = _GitilesCommitJson.from_raw_json(entry) 554 c[commit] = _GitilesCommitJson.from_raw_json(entry)
543 555 results.append(commit)
544 matched = False
545 for path in paths:
546 for diff_entry in entry['tree_diff']:
547 if (diff_entry['old_path'].startswith(path) or
548 diff_entry['new_path'].startswith(path)):
549 matched = True
550 break
551 if matched:
552 break
553 if matched or not paths:
554 results.append(commit)
555 556
556 results.reverse() 557 results.reverse()
557 return map(self.commit_metadata, results) 558 return map(self.commit_metadata, results)
558 559
559 def _resolve_refspec_impl(self, refspec): 560 def _resolve_refspec_impl(self, refspec):
560 if self.is_resolved_revision(refspec): 561 if self.is_resolved_revision(refspec):
561 return self.commit_metadata(refspec).commit 562 return self.commit_metadata(refspec).commit
562 return self._fetch_commit_json(refspec).commit 563 return self._fetch_commit_json(refspec).commit
563 564
564 def _commit_metadata_impl(self, revision): 565 def _commit_metadata_impl(self, revision):
565 self.assert_remote('_commit_metadata_impl') 566 self.assert_remote('_commit_metadata_impl')
566 rev_json = self._fetch_commit_json(revision) 567 rev_json = self._fetch_commit_json(revision)
567 568
568 recipes_cfg_text = self._fetch_gitiles( 569 recipes_cfg_text = self._fetch_gitiles(
569 '+/%s/infra/config/recipes.cfg?format=TEXT', revision 570 '+/%s/infra/config/recipes.cfg?format=TEXT', revision
570 ).text.decode('base64') 571 ).text.decode('base64')
571 spec = json_format.Parse( 572 spec = json_format.Parse(
572 recipes_cfg_text, package_pb2.Package(), ignore_unknown_fields=True) 573 recipes_cfg_text, package_pb2.Package(), ignore_unknown_fields=True)
573 574
574 return CommitMetadata( 575 return CommitMetadata(
575 revision, 576 revision,
576 rev_json.author_email, 577 rev_json.author_email,
577 rev_json.commit_timestamp, 578 rev_json.commit_timestamp,
578 rev_json.message_lines, 579 rev_json.message_lines,
579 spec) 580 spec,
581 has_interesting_changes(spec, rev_json.changed_files))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698