OLD | NEW |
---|---|
1 # Copyright 2016 The LUCI Authors. All rights reserved. | 1 # Copyright 2016 The LUCI Authors. All rights reserved. |
2 # Use of this source code is governed under the Apache License, Version 2.0 | 2 # Use of this source code is governed under the Apache License, Version 2.0 |
3 # that can be found in the LICENSE file. | 3 # that can be found in the LICENSE file. |
4 | 4 |
5 import calendar | 5 import calendar |
6 import httplib | 6 import httplib |
7 import json | 7 import json |
8 import logging | 8 import logging |
9 import os | 9 import os |
10 import re | 10 import re |
(...skipping 13 matching lines...) Expand all Loading... | |
24 from . import env | 24 from . import env |
25 from . import requests_ssl | 25 from . import requests_ssl |
26 from .requests_ssl import requests | 26 from .requests_ssl import requests |
27 | 27 |
28 import subprocess42 | 28 import subprocess42 |
29 from google.protobuf import json_format | 29 from google.protobuf import json_format |
30 | 30 |
31 LOGGER = logging.getLogger(__name__) | 31 LOGGER = logging.getLogger(__name__) |
32 | 32 |
33 | 33 |
34 def has_interesting_changes(spec, changed_files): | |
35 # TODO(iannucci): analyze bundle_extra_paths.txt too. | |
36 return ( | |
37 'infra/config/recipes.cfg' in changed_files or | |
38 any(f.startswith(spec.recipes_path) for f in changed_files) | |
39 ) | |
40 | |
41 | |
34 class FetchError(Exception): | 42 class FetchError(Exception): |
35 pass | 43 pass |
36 | 44 |
37 | 45 |
38 class FetchNotAllowedError(FetchError): | 46 class FetchNotAllowedError(FetchError): |
39 pass | 47 pass |
40 | 48 |
41 | 49 |
42 class UnresolvedRefspec(Exception): | 50 class UnresolvedRefspec(Exception): |
43 pass | 51 pass |
44 | 52 |
45 | 53 |
46 # revision (str): the revision of this commit (i.e. hash) | 54 # revision (str): the revision of this commit (i.e. hash) |
47 # author_email (str|None): the email of the author of this commit | 55 # author_email (str|None): the email of the author of this commit |
48 # commit_timestamp (int): the unix commit timestamp for this commit | 56 # commit_timestamp (int): the unix commit timestamp for this commit |
49 # message_lines (tuple(str)): the message of this commit | 57 # message_lines (tuple(str)): the message of this commit |
50 # spec (package_pb2.Package): the parsed infra/config/recipes.cfg file or None. | 58 # spec (package_pb2.Package): the parsed infra/config/recipes.cfg file or None. |
59 # roll_candidate (bool): if this commit contains changes which are known to | |
60 # affect the behavior of the recipes (i.e. modifications within recipe_path | |
61 # and/or modifications to recipes.cfg) | |
51 CommitMetadata = namedtuple( | 62 CommitMetadata = namedtuple( |
52 '_CommitMetadata', | 63 '_CommitMetadata', |
53 'revision author_email commit_timestamp message_lines spec') | 64 'revision author_email commit_timestamp message_lines spec roll_candidate') |
54 | 65 |
55 | 66 |
56 class Backend(object): | 67 class Backend(object): |
57 @staticmethod | 68 @staticmethod |
58 def class_for_type(repo_type): | 69 def class_for_type(repo_type): |
59 """ | 70 """ |
60 Args: | 71 Args: |
61 repo_type (package_pb2.DepSpec.RepoType) | 72 repo_type (package_pb2.DepSpec.RepoType) |
62 | 73 |
63 Returns Backend (class): Returns the Backend appropriate for the | 74 Returns Backend (class): Returns the Backend appropriate for the |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
107 _GIT_METADATA_CACHE = {} | 118 _GIT_METADATA_CACHE = {} |
108 | 119 |
109 # This matches git commit hashes. | 120 # This matches git commit hashes. |
110 _COMMIT_RE = re.compile(r'^[a-fA-F0-9]{40}$') | 121 _COMMIT_RE = re.compile(r'^[a-fA-F0-9]{40}$') |
111 | 122 |
112 def commit_metadata(self, refspec): | 123 def commit_metadata(self, refspec): |
113 """Cached version of _commit_metadata_impl. | 124 """Cached version of _commit_metadata_impl. |
114 | 125 |
115 The refspec will be resolved if it's not absolute. | 126 The refspec will be resolved if it's not absolute. |
116 | 127 |
117 Returns { | 128 Returns (CommitMetadata). |
118 'author': '<author name>', | |
119 'message': '<commit message>', | |
120 'spec': package_pb2.Package or None, # the parsed recipes.cfg file. | |
121 } | |
122 """ | 129 """ |
123 revision = self.resolve_refspec(refspec) | 130 revision = self.resolve_refspec(refspec) |
124 cache = self._GIT_METADATA_CACHE.setdefault(self.repo_url, {}) | 131 cache = self._GIT_METADATA_CACHE.setdefault(self.repo_url, {}) |
125 if revision not in cache: | 132 if revision not in cache: |
126 cache[revision] = self._commit_metadata_impl(revision) | 133 cache[revision] = self._commit_metadata_impl(revision) |
127 return cache[revision] | 134 return cache[revision] |
128 | 135 |
129 @classmethod | 136 @classmethod |
130 def is_resolved_revision(cls, revision): | 137 def is_resolved_revision(cls, revision): |
131 return cls._COMMIT_RE.match(revision) | 138 return cls._COMMIT_RE.match(revision) |
132 | 139 |
133 @classmethod | 140 @classmethod |
134 def assert_resolved(cls, revision): | 141 def assert_resolved(cls, revision): |
135 if not cls.is_resolved_revision(revision): | 142 if not cls.is_resolved_revision(revision): |
136 raise UnresolvedRefspec('unresolved refspec %r' % revision) | 143 raise UnresolvedRefspec('unresolved refspec %r' % revision) |
137 | 144 |
138 def resolve_refspec(self, refspec): | 145 def resolve_refspec(self, refspec): |
139 if self.is_resolved_revision(refspec): | 146 if self.is_resolved_revision(refspec): |
140 return refspec | 147 return refspec |
141 return self._resolve_refspec_impl(refspec) | 148 return self._resolve_refspec_impl(refspec) |
142 | 149 |
143 def updates(self, revision, other_revision, paths): | 150 def updates(self, revision, other_revision): |
144 """Returns a list of revisions |revision| through |other_revision| | 151 """Returns a list of revisions |revision| through |other_revision| |
145 (inclusive). | 152 (inclusive). |
146 | 153 |
147 If |paths| is a non-empty list, the history is scoped just to these paths. | |
148 | |
149 Returns list(CommitMetadata) - The commit metadata in the range | 154 Returns list(CommitMetadata) - The commit metadata in the range |
150 (revision,other_revision]. | 155 (revision,other_revision]. |
151 """ | 156 """ |
152 self.assert_resolved(revision) | 157 self.assert_resolved(revision) |
153 self.assert_resolved(other_revision) | 158 self.assert_resolved(other_revision) |
154 return self._updates_impl(revision, other_revision, paths) | 159 return self._updates_impl(revision, other_revision) |
155 | 160 |
156 ### direct overrides. These are public methods which must be overridden. | 161 ### direct overrides. These are public methods which must be overridden. |
157 | 162 |
158 @property | 163 @property |
159 def repo_type(self): | 164 def repo_type(self): |
160 """Returns package_pb2.DepSpec.RepoType.""" | 165 """Returns package_pb2.DepSpec.RepoType.""" |
161 raise NotImplementedError() | 166 raise NotImplementedError() |
162 | 167 |
163 def fetch(self, refspec): | 168 def fetch(self, refspec): |
164 """Does a fetch for the provided refspec (e.g. get all data from remote), if | 169 """Does a fetch for the provided refspec (e.g. get all data from remote), if |
(...skipping 13 matching lines...) Expand all Loading... | |
178 remote git repo, e.g. 'refs/heads/master', 'deadbeef...face', etc. | 183 remote git repo, e.g. 'refs/heads/master', 'deadbeef...face', etc. |
179 """ | 184 """ |
180 # TODO(iannucci): Alter the contract for this method so that it only checks | 185 # TODO(iannucci): Alter the contract for this method so that it only checks |
181 # out the files referred to according to the rules that the bundle | 186 # out the files referred to according to the rules that the bundle |
182 # subcommand uses. | 187 # subcommand uses. |
183 raise NotImplementedError() | 188 raise NotImplementedError() |
184 | 189 |
185 ### private overrides. Override these in the implementations, but don't call | 190 ### private overrides. Override these in the implementations, but don't call |
186 ### externally. | 191 ### externally. |
187 | 192 |
188 def _updates_impl(self, revision, other_revision, paths): | 193 def _updates_impl(self, revision, other_revision): |
189 """Returns a list of revisions |revision| through |other_revision|. This | 194 """Returns a list of revisions |revision| through |other_revision|. This |
190 includes |revision| and |other_revision|. | 195 includes |revision| and |other_revision|. |
191 | 196 |
192 If |paths| is a non-empty list, the history is scoped just to these paths. | |
193 | |
194 Args: | 197 Args: |
195 revision (str) - the first git commit | 198 revision (str) - the first git commit |
196 other_revision (str) - the second git commit | 199 other_revision (str) - the second git commit |
197 | 200 |
198 Returns list(CommitMetadata) - The commit metadata in the range | 201 Returns list(CommitMetadata) - The commit metadata in the range |
199 [revision,other_revision]. | 202 [revision,other_revision]. |
200 """ | 203 """ |
201 raise NotImplementedError() | 204 raise NotImplementedError() |
202 | 205 |
203 def _resolve_refspec_impl(self, refspec): | 206 def _resolve_refspec_impl(self, refspec): |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
320 self.fetch(refspec) | 323 self.fetch(refspec) |
321 | 324 |
322 # reset touches index.lock which is problematic when multiple processes are | 325 # reset touches index.lock which is problematic when multiple processes are |
323 # accessing the recipes at the same time. To allieviate this, we do a quick | 326 # accessing the recipes at the same time. To allieviate this, we do a quick |
324 # diff, which will exit if `revision` is not already checked out. | 327 # diff, which will exit if `revision` is not already checked out. |
325 try: | 328 try: |
326 self._git('diff', '--quiet', revision) | 329 self._git('diff', '--quiet', revision) |
327 except GitError: | 330 except GitError: |
328 self._git('reset', '-q', '--hard', revision) | 331 self._git('reset', '-q', '--hard', revision) |
329 | 332 |
330 def _updates_impl(self, revision, other_revision, paths): | 333 def _updates_impl(self, revision, other_revision): |
331 args = [ | 334 args = [ |
332 'rev-list', | 335 'rev-list', |
333 '--reverse', | 336 '--reverse', |
334 '--topo-order', | 337 '--topo-order', |
335 '%s..%s' % (revision, other_revision), | 338 '%s..%s' % (revision, other_revision), |
336 ] | 339 ] |
337 if paths: | |
338 args.extend(['--'] + paths) | |
339 return [ | 340 return [ |
340 self.commit_metadata(rev) | 341 self.commit_metadata(rev) |
341 for rev in self._git(*args).strip().split('\n') | 342 for rev in self._git(*args).strip().split('\n') |
342 if bool(rev) | 343 if bool(rev) |
343 ] | 344 ] |
344 | 345 |
345 def _resolve_refspec_impl(self, revision): | 346 def _resolve_refspec_impl(self, revision): |
346 self._ensure_local_repo_exists() | 347 self._ensure_local_repo_exists() |
347 self.assert_remote('resolve refspec %r' % revision) | 348 self.assert_remote('resolve refspec %r' % revision) |
348 rslt = self._git('ls-remote', self.repo_url, revision).split()[0] | 349 rslt = self._git('ls-remote', self.repo_url, revision).split()[0] |
(...skipping 11 matching lines...) Expand all Loading... | |
360 # %`Body` | 361 # %`Body` |
361 meta = self._git( | 362 meta = self._git( |
362 'show', '-s', '--format=%aE%n%ct%n%B', revision).rstrip('\n').splitlines() | 363 'show', '-s', '--format=%aE%n%ct%n%B', revision).rstrip('\n').splitlines() |
363 | 364 |
364 try: | 365 try: |
365 spec = package_io.parse(self._git( | 366 spec = package_io.parse(self._git( |
366 'cat-file', 'blob', '%s:infra/config/recipes.cfg' % revision)) | 367 'cat-file', 'blob', '%s:infra/config/recipes.cfg' % revision)) |
367 except GitError: | 368 except GitError: |
368 spec = None | 369 spec = None |
369 | 370 |
371 # check diff to see if it touches anything interesting. | |
372 changed_files = set(self._git( | |
373 'diff-tree', '-r', '--no-commit-id', '--name-only', '%s^!' % revision) | |
374 .splitlines()) | |
375 | |
370 return CommitMetadata(revision, meta[0], | 376 return CommitMetadata(revision, meta[0], |
371 int(meta[1]), tuple(meta[2:]), | 377 int(meta[1]), tuple(meta[2:]), |
372 spec) | 378 spec, has_interesting_changes(spec, changed_files)) |
373 | 379 |
374 class GitilesFetchError(FetchError): | 380 class GitilesFetchError(FetchError): |
375 """An HTTP error that occurred during Gitiles fetching.""" | 381 """An HTTP error that occurred during Gitiles fetching.""" |
376 | 382 |
377 def __init__(self, status, message): | 383 def __init__(self, status, message): |
378 super(GitilesFetchError, self).__init__( | 384 super(GitilesFetchError, self).__init__( |
379 'Gitiles error code (%d): %s' % (status, message)) | 385 'Gitiles error code (%d): %s' % (status, message)) |
380 self.status = status | 386 self.status = status |
381 self.message = message | 387 self.message = message |
382 | 388 |
383 @staticmethod | 389 @staticmethod |
384 def transient(e): | 390 def transient(e): |
385 """ | 391 """ |
386 Returns (bool): True if "e" is a GitilesFetchError with transient HTTP code. | 392 Returns (bool): True if "e" is a GitilesFetchError with transient HTTP code. |
387 """ | 393 """ |
388 return (isinstance(e, GitilesFetchError) and | 394 return (isinstance(e, GitilesFetchError) and |
389 e.status >= httplib.INTERNAL_SERVER_ERROR) | 395 e.status >= httplib.INTERNAL_SERVER_ERROR) |
390 | 396 |
391 | 397 |
392 # Internal cache object for GitilesBackend. | 398 # Internal cache object for GitilesBackend. |
393 # commit (str) - the git commit hash | 399 # commit (str) - the git commit hash |
394 # author_email (str) - the author email for this commit | 400 # author_email (str) - the author email for this commit |
395 # message_lines (tuple) - the lines of the commit message | 401 # message_lines (tuple) - the lines of the commit message |
402 # changed_files (frozenset) - all paths touched by this commit | |
396 class _GitilesCommitJson(namedtuple( | 403 class _GitilesCommitJson(namedtuple( |
397 '_GitilesCommitJson', | 404 '_GitilesCommitJson', |
398 'commit author_email commit_timestamp message_lines')): | 405 'commit author_email commit_timestamp message_lines changed_files')): |
399 @classmethod | 406 @classmethod |
400 def from_raw_json(cls, raw): | 407 def from_raw_json(cls, raw): |
408 mod_files = set() | |
409 for entry in raw['tree_diff']: | |
410 mod_files.add(entry['old_path']) | |
dnj
2017/04/20 18:16:21
Not sure if all of these fields are guaranteed to
iannucci
2017/04/20 21:53:20
They are, if there are entries in tree_diff.
| |
411 mod_files.add(entry['new_path']) | |
401 return cls( | 412 return cls( |
402 raw['commit'], | 413 raw['commit'], |
403 raw['author']['email'], | 414 raw['author']['email'], |
404 calendar.timegm(time.strptime(raw['committer']['time'])), | 415 calendar.timegm(time.strptime(raw['committer']['time'])), |
405 tuple(raw['message'].splitlines()), | 416 tuple(raw['message'].splitlines()), |
417 frozenset(mod_files), | |
406 ) | 418 ) |
407 | 419 |
408 | 420 |
409 class GitilesBackend(Backend): | 421 class GitilesBackend(Backend): |
410 """GitilesBackend uses a repo served by Gitiles.""" | 422 """GitilesBackend uses a repo served by Gitiles.""" |
411 | 423 |
412 # Prefix at the beginning of Gerrit/Gitiles JSON API responses. | 424 # Prefix at the beginning of Gerrit/Gitiles JSON API responses. |
413 _GERRIT_XSRF_HEADER = ')]}\'\n' | 425 _GERRIT_XSRF_HEADER = ')]}\'\n' |
414 | 426 |
415 @util.exponential_retry(condition=GitilesFetchError.transient) | 427 @util.exponential_retry(condition=GitilesFetchError.transient) |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
459 | 471 |
460 def _fetch_commit_json(self, refspec): | 472 def _fetch_commit_json(self, refspec): |
461 """Returns _GitilesCommitJson for the refspec. | 473 """Returns _GitilesCommitJson for the refspec. |
462 | 474 |
463 If refspec is resolved then this value is cached. | 475 If refspec is resolved then this value is cached. |
464 """ | 476 """ |
465 c = self._COMMIT_JSON_CACHE.setdefault(self.repo_url, {}) | 477 c = self._COMMIT_JSON_CACHE.setdefault(self.repo_url, {}) |
466 if refspec in c: | 478 if refspec in c: |
467 return c[refspec] | 479 return c[refspec] |
468 | 480 |
469 raw = self._fetch_gitiles_json('+/%s?format=JSON', refspec) | 481 raw = self._fetch_gitiles_json('+/%s?name-status=1&format=JSON', refspec) |
dnj
2017/04/20 18:16:21
Can we document what "name-status=1" does? format=
iannucci
2017/04/20 21:53:20
Done.
| |
470 ret = _GitilesCommitJson.from_raw_json(raw) | 482 ret = _GitilesCommitJson.from_raw_json(raw) |
471 if self.is_resolved_revision(refspec): | 483 if self.is_resolved_revision(refspec): |
472 c[refspec] = ret | 484 c[refspec] = ret |
473 | 485 |
474 return ret | 486 return ret |
475 | 487 |
476 | 488 |
477 ### Backend implementations | 489 ### Backend implementations |
478 | 490 |
479 | 491 |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
518 | 530 |
519 # TODO(iannucci): This implementation may be slow if we need to retieve | 531 # TODO(iannucci): This implementation may be slow if we need to retieve |
520 # multiple files/archives from the remote server. Should possibly consider | 532 # multiple files/archives from the remote server. Should possibly consider |
521 # using a thread pool here. | 533 # using a thread pool here. |
522 | 534 |
523 archive_response = self._fetch_gitiles( | 535 archive_response = self._fetch_gitiles( |
524 '+archive/%s/%s.tar.gz', revision, recipes_path_rel) | 536 '+archive/%s/%s.tar.gz', revision, recipes_path_rel) |
525 with tarfile.open(fileobj=StringIO(archive_response.content)) as tf: | 537 with tarfile.open(fileobj=StringIO(archive_response.content)) as tf: |
526 tf.extractall(recipes_path) | 538 tf.extractall(recipes_path) |
527 | 539 |
528 def _updates_impl(self, revision, other_revision, paths): | 540 def _updates_impl(self, revision, other_revision): |
529 self.assert_remote('_updates_impl') | 541 self.assert_remote('_updates_impl') |
530 | 542 |
531 # TODO(iannucci): implement paging | 543 # TODO(iannucci): implement paging |
532 | 544 |
533 # To include info about touched paths (tree_diff), pass name-status=1 below. | 545 # To include info about touched paths (tree_diff), pass name-status=1 below. |
534 log_json = self._fetch_gitiles_json( | 546 log_json = self._fetch_gitiles_json( |
535 '+log/%s..%s?name-status=1&format=JSON', revision, other_revision) | 547 '+log/%s..%s?name-status=1&format=JSON', revision, other_revision) |
536 | 548 |
537 c = self._COMMIT_JSON_CACHE.setdefault(self.repo_url, {}) | 549 c = self._COMMIT_JSON_CACHE.setdefault(self.repo_url, {}) |
538 | 550 |
539 results = [] | 551 results = [] |
540 for entry in log_json['log']: | 552 for entry in log_json['log']: |
541 commit = entry['commit'] | 553 commit = entry['commit'] |
542 c[commit] = _GitilesCommitJson.from_raw_json(entry) | 554 c[commit] = _GitilesCommitJson.from_raw_json(entry) |
543 | 555 results.append(commit) |
544 matched = False | |
545 for path in paths: | |
546 for diff_entry in entry['tree_diff']: | |
547 if (diff_entry['old_path'].startswith(path) or | |
548 diff_entry['new_path'].startswith(path)): | |
549 matched = True | |
550 break | |
551 if matched: | |
552 break | |
553 if matched or not paths: | |
554 results.append(commit) | |
555 | 556 |
556 results.reverse() | 557 results.reverse() |
557 return map(self.commit_metadata, results) | 558 return map(self.commit_metadata, results) |
558 | 559 |
559 def _resolve_refspec_impl(self, refspec): | 560 def _resolve_refspec_impl(self, refspec): |
560 if self.is_resolved_revision(refspec): | 561 if self.is_resolved_revision(refspec): |
561 return self.commit_metadata(refspec).commit | 562 return self.commit_metadata(refspec).commit |
562 return self._fetch_commit_json(refspec).commit | 563 return self._fetch_commit_json(refspec).commit |
563 | 564 |
564 def _commit_metadata_impl(self, revision): | 565 def _commit_metadata_impl(self, revision): |
565 self.assert_remote('_commit_metadata_impl') | 566 self.assert_remote('_commit_metadata_impl') |
566 rev_json = self._fetch_commit_json(revision) | 567 rev_json = self._fetch_commit_json(revision) |
567 | 568 |
568 recipes_cfg_text = self._fetch_gitiles( | 569 recipes_cfg_text = self._fetch_gitiles( |
569 '+/%s/infra/config/recipes.cfg?format=TEXT', revision | 570 '+/%s/infra/config/recipes.cfg?format=TEXT', revision |
570 ).text.decode('base64') | 571 ).text.decode('base64') |
571 spec = json_format.Parse( | 572 spec = json_format.Parse( |
572 recipes_cfg_text, package_pb2.Package(), ignore_unknown_fields=True) | 573 recipes_cfg_text, package_pb2.Package(), ignore_unknown_fields=True) |
573 | 574 |
574 return CommitMetadata( | 575 return CommitMetadata( |
575 revision, | 576 revision, |
576 rev_json.author_email, | 577 rev_json.author_email, |
577 rev_json.commit_timestamp, | 578 rev_json.commit_timestamp, |
578 rev_json.message_lines, | 579 rev_json.message_lines, |
579 spec) | 580 spec, |
581 has_interesting_changes(spec, rev_json.changed_files)) | |
OLD | NEW |