Chromium Code Reviews| Index: git_cache.py |
| diff --git a/git_cache.py b/git_cache.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..e7279e84238d8d64a0139a4110ac14c140a3aaa5 |
| --- /dev/null |
| +++ b/git_cache.py |
| @@ -0,0 +1,248 @@ |
| +#!/usr/bin/env python |
| +# Copyright 2014 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""A git command for managing a local cache of git repositories.""" |
| + |
| +import errno |
| +import logging |
| +import optparse |
| +import os |
| +import subprocess |
| +import sys |
| + |
| +import gclient_utils |
| +import subcommand |
| + |
| + |
| +def DieWithError(message): |
| + print >> sys.stderr, message |
| + sys.exit(1) |
| + |
| + |
| +def UrlToCacheDir(url): |
| + """Converts a git url to a normalized form for the cache dir path.""" |
| + idx = url.find('://') |
|
hinoka
2014/02/13 23:01:29
szager mentioned this in a different CL, but lets
agable
2014/02/14 00:28:31
Done.
|
| + if idx != -1: |
| + url = url[idx+3:] |
| + if not url.endswith('.git'): |
| + url += '.git' |
| + return url.replace('-', '--').replace('/', '-') |
| + |
| + |
| +def RunGit(cmd, **kwargs): |
| + """Runs git in a subprocess.""" |
| + kwargs.setdefault('cwd', os.getcwd) |
|
hinoka
2014/02/13 23:01:29
os.getcwd()
agable
2014/02/14 00:28:31
Done.
|
| + if kwargs.get('filter_fn'): |
| + kwargs['filter_fn'] = gclient_utils.GitFilter(kwargs.get('filter_fn')) |
| + kwargs.setdefault('print_stdout', False) |
| + env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy()) |
| + env.setdefault('GIT_ASKPASS', 'true') |
| + env.setdefault('SSH_ASKPASS', 'true') |
| + else: |
| + kwargs.setdefault('print_stdout', True) |
| + stdout = kwargs.get('stdout', sys.stdout) |
| + stdout.write( |
|
hinoka
2014/02/13 23:01:29
print >>stdout, "..." ?
\n usage might be flaky on
agable
2014/02/19 02:25:51
Done.
|
| + 'running \'git %s\' in \'%s\'\n' % (' '.join(cmd), kwargs['cwd'])) |
| + gclient_utils.CheckCallAndFilter(['git'] + cmd, **kwargs) |
| + |
| + |
| +class LockError(Exception): |
| + pass |
| + |
| + |
| +class Lockfile(object): |
| + """Class to represent a cross-platform process-specific lockfile.""" |
| + def __init__(self, path): |
| + self.path = os.path.abspath(path) |
| + self.lockfile = self.path + ".lock" |
| + self.pid = os.getpid() |
| + |
| + def _read_pid(self): |
|
iannucci
2014/02/14 01:43:08
This method only really makes sense when stealing.
agable
2014/02/19 02:25:51
I seriously considered having _read_pid() and stea
|
| + """Reads the pid stored in the lockfile.""" |
| + try: |
| + with open(self.lockfile, 'r') as f: |
| + pid = int(f.readline().strip()) |
| + except (IOError, ValueError): |
| + pid = None |
| + return pid |
| + |
| + def _make_lockfile(self): |
| + """Safely creates a lockfile containing the current pid.""" |
| + open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY) |
| + open_mode = 0o644 |
|
iannucci
2014/02/14 01:43:08
Not sure what the value in having this be a extra
agable
2014/02/19 02:25:51
Done.
|
| + fd = os.open(self.lockfile, open_flags, open_mode) |
| + f = os.fdopen(fd, 'w') |
|
hinoka
2014/02/13 23:01:29
with os.fdopen(fd, 'w') as f: ?
agable
2014/02/14 00:28:31
AFAIK only builtin open() is a context manager, os
iannucci
2014/02/14 01:43:08
and also
print >> f, self.pid
:)
agable
2014/02/19 02:25:51
Done.
|
| + f.write('%s\n' % self.pid) |
| + f.close() |
| + |
| + def _remove_lockfile(self): |
| + """Deletes the lockfile. Does nothing if it doesn't exist.""" |
| + try: |
| + os.remove(self.lockfile) |
| + except OSError as exc: |
| + if exc.errno == errno.ENOENT: |
| + pass |
| + else: |
| + raise |
| + |
| + def lock(self): |
| + """Acquire the lock.""" |
| + try: |
| + self._make_lockfile() |
| + except OSError as e: |
| + if e.errno == errno.EEXIST: |
| + raise LockError("%s is already locked" % self.path) |
| + else: |
| + raise LockError("failed to create %s" % self.path) |
|
hinoka
2014/02/13 23:01:29
s/failed/Failed/
Also might as well include e.errn
agable
2014/02/14 00:28:31
Done.
|
| + |
| + def unlock(self): |
| + """Release the lock.""" |
| + if not self.is_locked(): |
| + raise LockError("%s is not locked" % self.path) |
| + if not self.i_am_locking(): |
| + raise LockError("%s is locked, but not by me" % self.path) |
| + self._remove_lockfile() |
| + |
| + def steal(self): |
|
hinoka
2014/02/13 23:01:29
This isn't used anywhere? Need a CMDwipeoutallthel
agable
2014/02/14 00:28:31
Heh. Yeah, adding CMDmineallmine() in this patchse
|
| + """Break a lock and replace it with one of our own. |
| + |
| + Note that this is potentially racy (someone else could put a lockfile in |
| + place between breaking the lock and placing our own). This is fine, |
| + since we only want to use this method when it is completely safe. |
| + """ |
| + old_pid = self._read_pid() |
| + if old_pid: |
| + print 'Breaking lock on %s left behind by process %d' % ( |
| + self.path, old_pid) |
| + self._remove_lockfile() |
| + self._make_lockfile() |
| + |
| + def is_locked(self): |
| + """Test if the file is locked by anyone.""" |
|
iannucci
2014/02/14 01:43:08
note, this is racy...
P1: lock
P2: is_locked?
P1:
agable
2014/02/19 02:25:51
I think you mean "yes" and "ask me again later" :P
|
| + return os.path.exists(self.lockfile) |
| + |
| + def i_am_locking(self): |
| + """Test if the file is locked by this process.""" |
| + return self.is_locked() and self.pid == self._read_pid() |
|
iannucci
2014/02/14 01:43:08
This should just be True iff make_lockfile didn't
agable
2014/02/19 02:25:51
...and no one else has stolen the lock. Checking t
|
| + |
| + def __enter__(self): |
| + self.lock() |
| + return self |
| + |
| + def __exit__(self, *_exc): |
| + self.unlock() |
| + |
| + |
| +@subcommand.usage('[url of repo to check for caching]') |
| +def CMDexists(parser, args): |
| + """Checks to see if there already is a cache of the given repo.""" |
|
hinoka
2014/02/13 23:01:29
Whats the purpose of this? It looks like it retur
agable
2014/02/14 00:28:31
Its purpose is to test for existence, and print th
iannucci
2014/02/14 01:43:08
I disagree a bit. I think: 0 exists, 1 DNE, -1 bad
agable
2014/02/19 02:25:51
parser.error() automatically generates retcode 2 f
|
| + options, args = parser.parse_args(args) |
| + if not len(args) == 1: |
| + DieWithError('git cache exists only takes exactly one repo url.') |
|
hinoka
2014/02/13 23:01:29
lets use parser.error() instead
agable
2014/02/14 00:28:31
Done.
|
| + url = args[0] |
| + repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url)) |
| + flag_file = os.path.join(repo_dir, 'config') |
| + if os.path.isdir(repo_dir) and os.path.isfile(flag_file): |
| + print repo_dir |
| + return 0 |
| + return 1 |
| + |
| + |
| +@subcommand.usage('[url of repo to add to or update in cache]') |
| +def CMDpopulate(parser, args): |
| + """Bare clones or updates a repository in the cache.""" |
| + parser.add_option('--local', |
| + help='local repository to initialize from') |
|
hinoka
2014/02/13 23:01:29
nit: align at paren. Only align at 4 spaces if fi
agable
2014/02/14 00:28:31
Done.
iannucci
2014/02/14 01:43:08
should we auto-default to the current repo, if the
agable
2014/02/19 02:25:51
What does git clone do if --reference points to an
|
| + options, args = parser.parse_args(args) |
| + if not len(args) == 1: |
| + DieWithError('git cache populate only takes exactly one repo url.') |
|
hinoka
2014/02/13 23:01:29
lets use parser.error() instead
agable
2014/02/14 00:28:31
Done.
|
| + url = args[0] |
| + |
| + gclient_utils.safe_makedirs(options.cache_dir) |
| + repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url)) |
| + |
| + # If we've been supplied with a local repository to help out, |
| + # make sure that it is a full direct clone before relying on it. |
| + local_objects = local_altfile = '' |
| + if options.local: |
| + local_objects = os.path.join( |
| + os.path.abspath(options.local), '.git', 'objects') |
| + local_altfile = os.path.join(local_objects, 'info', 'alternates') |
| + use_reference = ( |
| + os.path.exists(local_objects) and not os.path.exists(local_altfile)) |
| + altfile = os.path.join(repo_dir, 'objects', 'info', 'alternates') |
| + |
| + v = [] |
| + filter_fn = lambda l: '[up to date]' not in l |
| + if options.verbose: |
| + v = ['-v'] |
| + filter_fn = None |
| + |
| + with Lockfile(repo_dir): |
|
hinoka
2014/02/13 23:01:29
"with Lock" generally means "wait for this lock",
agable
2014/02/14 00:28:31
Yeah, we already talked about this, but replying h
iannucci
2014/02/14 01:43:08
Racy
agable
2014/02/19 02:25:51
Yep.
|
| + # Do a full clone if the repo is new or is in a bad state. |
| + if not os.path.exists(os.path.join(repo_dir, 'config')): |
| + gclient_utils.rmtree(repo_dir) |
| + cmd = ['clone'] + v + ['-c', 'core.deltaBaseCacheLimit=2g', |
| + '--progress', '--bare'] |
| + |
| + if use_reference: |
| + cmd += ['--reference', os.path.abspath(options.local)] |
| + |
| + RunGit(cmd + [url, repo_dir], |
| + filter_fn=filter_fn, cwd=options.cache_dir, retry=True) |
| + |
| + else: |
| + if use_reference: |
| + with open(altfile, 'w') as f: |
| + f.write(os.path.abspath(local_objects)) |
| + |
| + RunGit(['fetch'] + v + ['--multiple', '--progress', '--all'], |
| + filter_fn=filter_fn, cwd=repo_dir, retry=True) |
| + |
| + # If the clone has an object dependency on the local repo, break it |
| + # with repack and remove the linkage. |
|
iannucci
2014/02/14 01:43:08
may consider an optimization:
* if the local rep
agable
2014/02/19 02:25:51
I think not worth it. In addition, repack -ad is p
|
| + if os.path.exists(altfile): |
| + RunGit(['repack', '-a'], cwd=repo_dir) |
| + os.remove(altfile) |
| + |
| + |
| +class OptionParser(optparse.OptionParser): |
| + """Wrapper class for OptionParser to handle global options.""" |
| + def __init__(self, *args, **kwargs): |
| + optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs) |
| + self.add_option('-c', '--cache-dir', |
| + help='Path to the directory containing the cache.') |
| + self.add_option('-v', '--verbose', action='count', default=0, |
| + help='Increase verbosity (can be passed multiple times).') |
| + |
| + def parse_args(self, args=None, values=None): |
| + options, args = optparse.OptionParser.parse_args(self, args, values) |
| + |
| + try: |
| + global_cache_dir = subprocess.check_output( |
| + ['git', 'config', '--global', 'cache.cachepath']).strip() |
| + if options.cache_dir: |
|
hinoka
2014/02/13 23:01:29
The if/else doesn't need to be in the try block.
agable
2014/02/14 00:28:31
Well, it kinda does. I can move it out but then I
|
| + logging.warn('Overriding globally-configured cache directory.') |
| + else: |
| + options.cache_dir = global_cache_dir |
| + except subprocess.CalledProcessError: |
| + if not options.cache_dir: |
| + DieWithError('No cache directory specified on command line ' |
|
hinoka
2014/02/13 23:01:29
lets use parser.error() instead
agable
2014/02/14 00:28:31
Done.
|
| + 'or in cache.cachepath.') |
| + options.cache_dir = os.path.abspath(options.cache_dir) |
| + |
| + levels = [logging.WARNING, logging.INFO, logging.DEBUG] |
| + logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)]) |
| + |
| + return options, args |
| + |
| + |
| +def main(argv): |
| + dispatcher = subcommand.CommandDispatcher(__name__) |
| + return dispatcher.execute(OptionParser(), argv) |
| + |
| + |
| +if __name__ == '__main__': |
| + sys.exit(main(sys.argv[1:])) |