Index: git_drover.py |
diff --git a/git_drover.py b/git_drover.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..be5a3957ba6da2c6431d2d574441482881a030f4 |
--- /dev/null |
+++ b/git_drover.py |
@@ -0,0 +1,557 @@ |
+#!/usr/bin/env python |
+# Copyright (c) 2014 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+# 'redefining builtin __doc__' pylint: disable=W0622 |
+__doc__ = """ |
agable
2014/04/18 00:17:08
wat why is this necessary. __doc__ is already corr
iannucci
2014/04/28 21:05:28
Hmmmmmmmmmmm...... Not sure why I did that... Done
|
+Merge/Revert changes to chromium release branches. |
agable
2014/04/18 00:17:08
Chromium
iannucci
2014/04/28 21:05:28
Done.
|
+ |
+This will use the git clone in the current directory if it matches the commit |
+you passed in. Alternately, run this script in an empty directory and it will |
+clone the appropriate repo for you (using `git cache` to do the smallest amount |
+of network IO possible). |
+ |
+This tool is aware of the following repos: |
+""" |
+ |
+import argparse |
+import collections |
+import multiprocessing |
+import os |
+import pprint |
+import re |
+import sys |
+import textwrap |
+import urllib2 |
agable
2014/04/18 00:17:08
dammit depot_tools y u no have requests
iannucci
2014/04/28 21:05:28
IKR
|
+import urlparse |
+ |
+from multiprocessing.pool import ThreadPool |
+ |
+import git_cache |
+import git_common as git |
+ |
+from third_party import fancy_urllib |
+ |
+assert fancy_urllib.can_validate_certs() |
+ |
+CA_CERTS_FILE = os.path.abspath(os.path.join( |
+ os.path.dirname(__file__), 'third_party', 'boto', 'cacerts', 'cacerts.txt' |
+)) |
+ |
+urllib2.install_opener(urllib2.build_opener( |
+ fancy_urllib.FancyRedirectHandler(), |
+ fancy_urllib.FancyHTTPSHandler())) |
+ |
+ |
+MISSING = object() |
+ |
+OK_HOST_FMT = '%s.googlesource.com' |
+OK_REPOS = { |
+ 'chrome-internal': ('chrome/src-internal',), |
+ 'chromium': ('chromium/src', 'chromium/blink', |
+ 'native_client/src/native_client') |
+} |
+ |
+def repo_url(host, repo): |
+ assert host in OK_REPOS |
+ assert repo in OK_REPOS[host] |
+ return 'https://%s/%s.git' % (OK_HOST_FMT % host, repo) |
+ |
+# lambda avoids polluting module with variable names, but still executes at |
+# import-time. |
+__doc__ += (lambda: '\n'.join([ |
+ ' * %s' % repo_url(host, repo) |
+ for host, repos in OK_REPOS.iteritems() |
+ for repo in repos |
+]))() |
+ |
+ |
+def die(msg, *args): |
+ msg = textwrap.dedent(msg) |
+ if args: |
+ msg = msg % args |
+ print >> sys.stderr, msg |
+ sys.exit(1) |
+ |
+ |
+def retry(fn, args=(), kwargs=None, on=(), but_not=(), upto=3): |
+ kwargs = kwargs or {} |
+ for attempt in xrange(upto): |
+ try: |
+ return fn(*args, **kwargs) |
+ except but_not: |
+ raise |
+ except on: |
+ if attempt + 1 == upto: |
+ raise |
+ |
+ |
+################################################################################ |
agable
2014/04/18 00:17:08
I see you like to live dangerously with your comme
iannucci
2014/04/28 21:05:28
https://www.youtube.com/watch?v=Dpjl4XJ91xY
|
+ |
+ |
+def announce(msg=None, msg_fn=lambda: None): |
+ print '=' * 80 |
+ if msg: |
+ print msg |
agable
2014/04/18 00:17:08
textwrap.dedent this one too
iannucci
2014/04/28 21:05:28
Done.
|
+ msg_fn() |
+ print '=' * 80 |
+ |
+ |
+def confirm(prompt='Is this correct?', abort='No changes have been made.'): |
+ while True: |
+ v = raw_input('%s (Y/n) ' % prompt) |
+ if v == '' or v in 'Yy': |
+ break |
+ if v in 'Nn': |
+ die('Aborting. %s' % abort) |
+ |
+ |
+def summarize_job(correct_url, commits, target_ref, action): |
+ def _msg_fn(): |
+ preposition = 'to' if action == 'merge' else 'from' |
+ print "Planning to %s %d change%s %s branch %s of %s." % ( |
+ action, len(commits), 's' if len(commits) > 1 else '', |
+ preposition, target_ref.num, correct_url) |
+ for commit in commits: |
+ print git.run('show', '-s', '--format=%H\t%s', commit) |
+ announce(msg_fn=_msg_fn) |
+ |
+ |
+def ensure_working_directory(commits, target_ref): |
+ # TODO(iannucci): check all hashes locally after fetching first |
+ |
+ fetch_specs = [ |
+ '%s:%s' % (target_ref.remote_full_ref, target_ref.remote_full_ref) |
+ ] + commits |
+ |
+ if git.check('rev-parse', '--is-inside-work-tree'): |
+ actual_url = git.get_remote_url('origin') |
+ |
+ if not actual_url or not is_ok_repo(actual_url): |
+ die("""\ |
+ Inside a git repo, but origin's remote URL doesn't match one of the |
agable
2014/04/18 00:17:08
I suppose technically these should be indented +4?
iannucci
2014/04/28 21:05:28
meh... makes 80 col awkward
|
+ supported git repos. |
+ Current URL: %s""", actual_url) |
+ |
+ s = git.run('status', '--porcelain') |
+ if s: |
+ die("""\ |
+ Your current directory is usable for the command you specified, but it |
+ appears to be dirty (i.e. there are uncommitted changes). Please commit, |
+ freeze, or stash these changes and run this command again. |
+ |
+ %s""", '\n'.join(' '+l for l in s.splitlines())) |
+ |
+ correct_url = get_correct_url(commits, actual_url) |
+ if correct_url != actual_url: |
+ die("""\ |
+ Commits specified appear to be from a different repo than the repo |
agable
2014/04/18 00:17:08
s/Commits specified/The specified commits/
iannucci
2014/04/28 21:05:28
Done.
|
+ in the current directory. |
+ Current Repo: %s |
+ Expected Repo: %s |
+ |
+ Please re-run this script in an empty working directory and we'll fetch |
+ the correct repo.""", actual_url, correct_url) |
+ |
+ m = git_cache.Mirror.from_repo('.') |
+ if m: |
+ m.populate(bootstrap=True, verbose=True) |
+ m.populate(fetch_specs=fetch_specs) |
+ |
+ elif len(os.listdir('.')) == 0: |
+ sample_path = "/path/to/cache" |
agable
2014/04/18 00:17:08
single quotes
iannucci
2014/04/28 21:05:28
Done.
|
+ if sys.platform.startswith('win'): |
+ sample_path = r"X:\path\to\cache" |
agable
2014/04/18 00:17:08
here too
iannucci
2014/04/28 21:05:28
Done.
|
+ if not git.config('cache.cachepath'): |
+ die("""\ |
+ Automatic drover checkouts require that you configure your global |
+ cachepath to make these automatic checkouts as fast as possible. Do this |
+ by running: |
+ git config --global cache.cachepath "%s" |
+ |
+ We recommend picking a non-network-mounted path with a decent amount of |
+ space (at least 4GB).""" % sample_path) |
+ |
+ correct_url = get_correct_url(commits) |
+ |
+ m = git_cache.Mirror(correct_url) |
+ m.populate(bootstrap=True, verbose=True) |
+ m.populate(fetch_specs=fetch_specs) |
+ git.run('clone', '-s', '--no-checkout', m.mirror_path, '.') |
+ git.run('update-ref', '-d', 'refs/heads/master') |
+ else: |
+ die('You must either invoke this from a git repo, or from an empty dir.') |
+ |
+ for s in [target_ref.local_full_ref] + commits: |
+ git.check('fetch', 'origin', s) |
+ |
+ return correct_url |
+ |
+ |
+def find_hash_urls(commits, presumed_url=None): |
agable
2014/04/18 00:17:08
It's a 70-line function, with three embedded funct
iannucci
2014/04/28 21:05:28
rewrite it in go? :p
|
+ pool = ThreadPool() |
+ |
+ def process_async_results(asr, results): |
agable
2014/04/18 00:17:08
Yes, it processes async results... still the least
iannucci
2014/04/28 21:05:28
PTAL
|
+ try: |
+ lost_commits = [] |
+ passes = 0 |
+ while asr and passes <= 10: |
+ new_asr = {} |
+ for commit, attempts in asr.iteritems(): |
+ new_attempts = [] |
+ for attempt in attempts: |
+ try: |
+ attempt = attempt.get(.5) |
+ if attempt is not MISSING: |
+ results[attempt].add(commit) |
+ break |
+ except multiprocessing.TimeoutError: |
+ new_attempts.append(attempt) |
+ else: |
+ if new_attempts: |
+ new_asr[commit] = new_attempts |
+ else: |
+ lost_commits.append(commit) |
+ asr = new_asr |
+ passes += 1 |
+ return lost_commits |
+ except Exception: |
+ import traceback |
+ traceback.print_exc() |
+ |
+ # TODO(iannucci): Gather a summary from each commit |
+ def exists(url, commit): |
+ query_url = '%s/+/%s?format=JSON' % (url, commit) |
+ return MISSING if GET(query_url) is MISSING else url |
+ |
+ def go_fish(commit, except_for=()): |
+ async_results = {commit: set()} |
+ for host, repos in OK_REPOS.iteritems(): |
+ for repo in repos: |
+ url = repo_url(host, repo) |
+ if url in except_for: |
+ continue |
+ async_results[commit].add( |
+ pool.apply_async(exists, args=(url, commit))) |
+ |
+ results = collections.defaultdict(set) |
+ lost = process_async_results(async_results, results) |
+ if not lost: |
+ return results.popitem()[0] |
+ |
+ # map of url -> set(commits) |
+ results = collections.defaultdict(set) |
+ |
+ # Try to find one hash which matches some repo |
+ while commits and not presumed_url: |
agable
2014/04/18 00:17:08
guessed_url? presumed means you're guessing withou
iannucci
2014/04/28 21:05:28
We ARE guessing without reason. In the event that
agable
2014/04/28 21:38:22
That's actually a really good reason, not no reaso
|
+ presumed_url = go_fish(commits[0]) |
+ results[presumed_url].add(commits[0]) |
+ commits = commits[1:] |
+ |
+ # map of commit -> attempts |
+ async_results = collections.defaultdict(list) |
+ for commit in commits: |
+ async_results[commit].append( |
+ pool.apply_async(exists, args=(presumed_url, commit))) |
+ |
+ lost = process_async_results(async_results, results) |
+ |
+ if lost: |
+ fishing_pool = ThreadPool() |
+ async_results = collections.defaultdict(list) |
+ for commit in lost: |
+ async_results[commit].append( |
+ fishing_pool.apply_async(go_fish, (commit,), |
+ {'except_for': presumed_url}) |
+ ) |
+ lost = process_async_results(async_results, results) |
+ if lost: |
+ results[None].update(lost) |
+ |
+ return {(k or 'UNKNOWN'): list(v) for k, v in results.iteritems()} |
+ |
+ |
+def GET(url, **kwargs): |
+ try: |
+ kwargs.setdefault('timeout', 5) |
+ request = fancy_urllib.FancyRequest(url) |
+ request.set_ssl_info(ca_certs=CA_CERTS_FILE) |
+ return retry(urllib2.urlopen, [request], kwargs, |
+ on=urllib2.URLError, but_not=urllib2.HTTPError, upto=3) |
+ except urllib2.HTTPError as e: |
+ if e.getcode() / 100 == 4: |
+ return MISSING |
+ raise |
+ |
+ |
+def get_correct_url(commits, presumed_url=None): |
+ unverified = commits |
+ if presumed_url: |
+ unverified = [c for c in unverified if not git.verify_commit(c)] |
+ if not unverified: |
+ return presumed_url |
+ git.cached_fetch(unverified) |
+ unverified = [c for c in unverified if not git.verify_commit(c)] |
+ if not unverified: |
+ return presumed_url |
+ |
+ url_hashes = find_hash_urls(unverified, presumed_url) |
+ if None in url_hashes: |
+ die("""\ |
+ Could not determine what repo the following commits originate from: |
+ %r""", url_hashes[None]) |
+ |
+ if len(url_hashes) > 1: |
+ die("""\ |
+ Ambiguous commits specified. You supplied multiple commits, but they |
+ appear to be from more than one repo? |
+ %s""", pprint.pformat(dict(url_hashes))) |
+ |
+ return url_hashes.popitem()[0] |
+ |
+ |
+def is_ok_repo(url): |
+ parsed = urlparse.urlsplit(url) |
+ host = None |
+ if parsed.scheme == 'https': |
agable
2014/04/18 00:17:08
See if you can reorganize this whole conditional.
iannucci
2014/04/28 21:05:28
Done.
|
+ for host in OK_REPOS: |
agable
2014/04/18 00:17:08
Turn this into
if not any(...):
return False
iannucci
2014/04/28 21:05:28
Not needed
|
+ if (OK_HOST_FMT % host) == parsed.netloc: |
+ break |
agable
2014/04/18 00:17:08
relying on the fact that |host| will remain set to
iannucci
2014/04/28 21:05:28
Done.
|
+ else: |
+ return False |
+ elif parsed.scheme == 'sso': |
+ if parsed.netloc not in OK_REPOS: |
+ return False |
+ host = parsed.netloc |
+ else: |
+ return False |
+ |
+ path = parsed.path.strip('/') |
+ if path.endswith('.git'): |
+ path = path[:-4] |
+ |
+ return path in OK_REPOS[host] |
+ |
+ |
+class NumberedBranch(collections.namedtuple('NumberedBranch', 'num')): |
+ # pylint: disable=W0232 |
+ @property |
+ def remote_full_ref(self): |
+ return 'refs/branch-heads/%d' % self.num |
+ |
+ @property |
+ def local_full_ref(self): |
+ return 'refs/origin/branch-heads/%d' % self.num |
+ |
+ |
+ReleaseBranch = collections.namedtuple('ReleaseBranch', 'num') |
+ |
+ |
+Channel = collections.namedtuple('Channel', 'os channel') |
+ |
+ |
+def resolve_ref(ref): |
+ def data(): |
+ import json |
+ silly = json.load(GET('http://omahaproxy.appspot.com/all.json')) |
+ ret = {} |
+ for os_blob in silly: |
+ v = ret[os_blob['os']] = {} |
+ for vers in os_blob['versions']: |
+ full_version = map(int, vers['version'].split('.')) |
+ b = full_version[2] |
+ |
+ # could be empty string or None |
+ tb = vers.get('true_branch') or '' |
+ tb = tb.split('_', 1)[0] |
+ if tb and tb.isdigit(): |
+ b = int(tb) |
+ v[vers['channel']] = {'major': full_version[0], |
+ 'branch': NumberedBranch(b)} |
+ return ret |
+ |
+ if isinstance(ref, NumberedBranch): |
+ return ref |
+ elif isinstance(ref, Channel): |
agable
2014/04/18 00:17:08
This is all getting removed.
iannucci
2014/04/28 21:05:28
Done.
|
+ d = data() |
+ if ref.os not in d: |
+ die('Unrecognized Channel: %r', ref) |
+ |
+ r = d[ref.os].get(ref.channel, {}).get('branch') |
+ if r: |
+ return r |
+ |
+ die("No channel %s for os %s found." % (ref.channel, ref.os)) |
+ elif isinstance(ref, ReleaseBranch): |
+ d = data() |
+ b = set() |
+ for channel_map in d.itervalues(): |
+ for channel, vers in channel_map.iteritems(): |
+ if channel == 'canary': |
+ continue # not a trustworthy source of information. |
+ if vers['major'] == ref.num: |
+ new_branch = vers['branch'] |
+ b.add(new_branch) |
+ |
+ if len(b) > 1: |
+ die('Ambiguous release branch m%s: %r', ref.num, b) |
+ if not b: |
+ die("Couldn't find branch for m%s", ref.num) |
+ |
+ return b.pop() |
+ else: |
+ die('Unrecognized ref type: %r', ref) |
+ |
+ |
+def parse_opts(): |
+ epilog = textwrap.dedent("""\ |
+ REF in the above may take the form of: |
+ DDDD - a numbered branch (i.e. refs/branch-heads/DDDD) |
+ mDD - a release milestone (will consult omahaproxy, aborts if ambiguous) |
agable
2014/04/18 00:17:08
remove?
iannucci
2014/04/28 21:05:28
Done.
|
+ os,channel - consults omahaproxy for the current branch-head |
agable
2014/04/18 00:17:08
remove
iannucci
2014/04/28 21:05:28
Done.
|
+ os = android, ios, cros, cf, linux, mac, win, ... |
+ channel = canary, dev, beta, stable, ... |
+ """) |
+ |
+ commit_re = re.compile('^[0-9a-fA-F]{40}$') |
+ def commit_type(s): |
+ if not commit_re.match(s): |
+ raise argparse.ArgumentTypeError("%r is not a valid commit hash" % s) |
+ return s |
+ |
+ def ref_type(s): |
+ if not s: |
+ raise argparse.ArgumentTypeError("Empty ref: %r" % s) |
+ if ',' in s: |
+ bits = s.split(',') |
+ if len(bits) != 2: |
+ raise argparse.ArgumentTypeError("Invalid Channel ref: %r" % s) |
+ return Channel(*bits) |
+ elif s[0] in 'mM': |
+ if not s[1:].isdigit(): |
+ raise argparse.ArgumentTypeError("Invalid ReleaseBranch ref: %r" % s) |
+ return ReleaseBranch(int(s[1:])) |
+ elif s.isdigit(): |
+ return NumberedBranch(int(s)) |
+ raise argparse.ArgumentTypeError("Invalid ref: %r" % s) |
+ |
+ parser = argparse.ArgumentParser( |
+ description=__doc__, epilog=epilog, |
+ formatter_class=argparse.RawDescriptionHelpFormatter |
+ ) |
+ |
+ parser.add_argument('commit', nargs=1, metavar='HASH', |
+ type=commit_type, help='commit hash to revert/merge') |
+ |
+ parser.add_argument('--prep_only', action='store_true', default=False, |
+ help=( |
+ 'Prep and upload the CL (without sending mail) but ' |
+ 'don\'t push.')) |
+ |
+ parser.add_argument('--bug', metavar='NUM', action='append', dest='bugs', |
+ help='optional bug number(s)') |
+ |
+ grp = parser.add_mutually_exclusive_group(required=True) |
+ grp.add_argument('--merge_to', metavar='REF', type=ref_type, |
+ help='branch to merge to') |
+ grp.add_argument('--revert_from', metavar='REF', type=ref_type, |
+ help='branch ref to revert from') |
+ opts = parser.parse_args() |
+ |
+ # TODO(iannucci): Support multiple commits |
+ opts.commits = opts.commit |
+ del opts.commit |
+ |
+ if opts.merge_to: |
+ opts.action = 'merge' |
+ opts.ref = resolve_ref(opts.merge_to) |
+ elif opts.revert_from: |
+ opts.action = 'revert' |
+ opts.ref = resolve_ref(opts.revert_from) |
+ else: |
+ parser.error("?confusion? must specify either revert_from or merge_to") |
agable
2014/04/18 00:17:08
s/\?confusion\?/You/
iannucci
2014/04/28 21:05:28
yeah, but argparse should have caught this already
|
+ |
+ del opts.merge_to |
+ del opts.revert_from |
+ |
+ return opts |
+ |
+ |
+def main(): |
+ opts = parse_opts() |
+ |
+ announce('Preparing working directory') |
+ |
+ correct_url = ensure_working_directory(opts.commits, opts.ref) |
+ summarize_job(correct_url, opts.commits, opts.ref, opts.action) |
+ confirm() |
+ |
+ announce('Checking out branches to %s changes' % opts.action) |
+ |
+ git.run('fetch', 'origin', |
+ '%s:%s' % (opts.ref.remote_full_ref, opts.ref.local_full_ref)) |
+ git.check('update-ref', '-d', 'refs/heads/__drover_base') |
+ git.run('checkout', '-b', '__drover_base', opts.ref.local_full_ref, |
+ stdout=None, stderr=None) |
+ git.run('config', 'branch.__drover_base.remote', 'origin') |
+ git.run('config', 'branch.__drover_base.merge', opts.ref.remote_full_ref) |
+ git.check('branch', '-D', '__drover_change') |
agable
2014/04/18 00:17:08
why branch -D here but update-ref -d for __drover_
iannucci
2014/04/28 21:05:28
Done.
|
+ git.run('checkout', '-t', '__drover_base', '-b', '__drover_change', |
+ stdout=None, stderr=None) |
+ |
+ announce('Performing %s' % opts.action) |
+ |
+ # TODO(iannucci): support --signoff ? |
+ authors = [] |
+ for commit in opts.commits: |
+ success = False |
+ if opts.action == 'merge': |
+ success = git.check('cherry-pick', '-x', commit, verbose=True, |
+ stdout=None, stderr=None) |
+ else: # merge |
agable
2014/04/18 00:17:08
revert
iannucci
2014/04/28 21:05:28
Done.
|
+ success = git.check('revert', '--no-edit', commit, verbose=True, |
+ stdout=None, stderr=None) |
+ if not success: |
+ die("""\ |
agable
2014/04/18 00:17:08
This doesn't require a multiline string.
iannucci
2014/04/28 21:05:28
Says you.
|
+ Aborting. Failed to %s. |
+ """ % opts.action) |
+ |
+ email = git.run('show', '--format=%ae', '-s') |
+ # git-svn email addresses take the form of: |
+ # user@domain.com@<svn id> |
+ authors.append('@'.join(email.split('@', 2)[:2])) |
+ |
+ announce('Success! Uploading to codereview.chromium.org') |
agable
2014/04/18 00:17:08
Not true for src-internal
iannucci
2014/04/28 21:05:28
Done.
|
+ |
+ if opts.prep_only: |
+ print "Prep only mode, uploading CL but not sending mail." |
+ mail = [] |
+ else: |
+ mail = ['--send-mail', '--reviewers=' + ','.join(authors)] |
+ |
+ args = [ |
+ '-c', 'gitcl.remotebranch=__drover_base', |
+ '-c', 'branch.__drover_change.base-url=%s' % correct_url, |
+ 'cl', 'upload', '--bypass-hooks' |
+ |
+ # TODO(iannucci): option to not bypass hooks? |
+ git.check(*args, stdout=None, stderr=None, stdin=None) |
+ |
+ if opts.prep_only: |
+ announce('Issue created. To push to the branch, run `git cl push`') |
+ else: |
+ announce('About to push! This will make the commit live.') |
+ confirm(abort=('Issue has been created, ' |
+ 'but change was not pushed to the repo.')) |
+ |
agable
2014/04/18 00:17:08
Missing the actual git push call.
iannucci
2014/04/28 21:05:28
^_^
|
+ return 0 |
+ |
+ |
+if __name__ == '__main__': |
+ sys.exit(main()) |