Chromium Code Reviews| Index: git_common.py |
| diff --git a/git_common.py b/git_common.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..84f372d679a5a820fb31effacb1441888b4a83e8 |
| --- /dev/null |
| +++ b/git_common.py |
| @@ -0,0 +1,332 @@ |
| +# Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +# Monkeypatch IMapIterator so that Ctrl-C can kill everything properly. |
| +# Derived from https://gist.github.com/aljungberg/626518 |
| +import multiprocessing.pool |
| +from multiprocessing.pool import IMapIterator |
| +def wrapper(func): |
| + def wrap(self, timeout=None): |
| + return func(self, timeout=timeout or 1e100) |
| + return wrap |
| +IMapIterator.next = wrapper(IMapIterator.next) |
| +IMapIterator.__next__ = IMapIterator.next |
| + |
| + |
| +import binascii |
| +import contextlib |
| +import functools |
| +import logging |
| +import signal |
| +import subprocess |
| +import sys |
| +import tempfile |
| +import threading |
| + |
| + |
| +GIT_EXE = 'git.bat' if sys.platform.startswith('win') else 'git' |
| + |
| + |
| +class CalledProcessError(Exception): |
| + def __init__(self, returncode, cmd): |
| + super(CalledProcessError, self).__init__() |
| + self.returncode = returncode |
| + self.cmd = cmd |
| + |
| + def __str__(self): |
| + return ( |
| + 'Command "%s" returned non-zero exit status %d' % |
| + (self.cmd, self.returncode)) |
| + |
| + |
| +def memoize_one(**kwargs): |
| + """Memoizes a single-argument pure function. |
| + |
| + Values of None are not cached. |
| + |
| + Kwargs: |
| + threadsafe (bool) - REQUIRED. Specifies whether to use locking around |
| + cache manipulation functions. This is a kwarg so that users of memoize_one |
| + are forced to explicitly and verbosely pick True or False. |
| + |
| + Adds three methods to the decorated function: |
| + * get(key, default=None) - Gets the value for this key from the cache. |
| + * set(key, value) - Sets the value for this key from the cache. |
| + * clear() - Drops the entire contents of the cache. Useful for unittests. |
| + * update(other) - Updates the contents of the cache from another dict. |
| + """ |
| + assert 'threadsafe' in kwargs, "Must specify threadsafe={True,False}" |
| + threadsafe = kwargs['threadsafe'] |
| + |
| + if threadsafe: |
| + def withlock(lock, f): |
| + def inner(*args, **kwargs): |
| + with lock: |
| + return f(*args, **kwargs) |
| + return inner |
| + else: |
| + def withlock(_lock, f): |
| + return f |
| + |
| + def decorator(f): |
| + # Instantiate the lock in decorator, in case users of memoize_one do: |
| + # |
| + # memoizer = memoize_one(threadsafe=True) |
| + # |
| + # @memoizer |
| + # def fn1(val): ... |
| + # |
| + # @memoizer |
| + # def fn2(val): ... |
| + |
| + lock = threading.Lock() if threadsafe else None |
| + cache = {} |
| + _get = withlock(lock, cache.get) |
| + _set = withlock(lock, cache.__setitem__) |
| + |
| + @functools.wraps(f) |
| + def inner(arg): |
| + ret = _get(arg) |
| + if ret is None: |
| + ret = f(arg) |
| + if ret is not None: |
| + _set(arg, ret) |
| + return ret |
| + inner.get = _get |
| + inner.set = _set |
| + inner.clear = withlock(lock, cache.clear) |
| + inner.update = withlock(lock, cache.update) |
| + return inner |
| + return decorator |
| + |
| + |
| +def _ScopedPool_initer(orig, orig_args): # pragma: no cover |
| + """Initializer method for ScopedPool's subprocesses. |
| + |
| + This helps ScopedPool handle Ctrl-C's correctly. |
| + """ |
| + signal.signal(signal.SIGINT, signal.SIG_IGN) |
| + if orig: |
| + orig(*orig_args) |
| + |
| + |
| +@contextlib.contextmanager |
| +def ScopedPool(*args, **kwargs): |
| + """Context Manager which returns a multiprocessing.pool instance which |
| + correctly deals with thrown exceptions. |
| + |
| + *args - Arguments to multiprocessing.pool |
| + |
| + Kwargs: |
| + kind ('threads', 'procs') - The type of underlying coprocess to use. |
| + **etc - Arguments to multiprocessing.pool |
| + """ |
| + if kwargs.pop('kind', None) == 'threads': |
| + pool = multiprocessing.pool.ThreadPool(*args, **kwargs) |
| + else: |
| + orig, orig_args = kwargs.get('initializer'), kwargs.get('initargs', ()) |
| + kwargs['initializer'] = _ScopedPool_initer |
| + kwargs['initargs'] = orig, orig_args |
| + pool = multiprocessing.pool.Pool(*args, **kwargs) |
| + |
| + try: |
| + yield pool |
| + pool.close() |
| + except: |
| + pool.terminate() |
| + raise |
| + finally: |
| + pool.join() |
| + |
| + |
| +class ProgressPrinter(object): |
|
M-A Ruel
2013/11/13 19:55:00
FTR, I prefer https://code.google.com/p/swarming/s
iannucci
2013/11/14 07:06:29
Nice :D.
Yeah we should have a standard library o
|
| + """Threaded single-stat status message printer.""" |
| + def __init__(self, fmt, enabled=None, stream=sys.stderr, period=0.5): |
| + """Create a ProgressPrinter. |
| + |
| + Use it as a context manager which produces a simple 'increment' method: |
| + |
| + with ProgressPrinter('(%%(count)d/%d)' % 1000) as inc: |
| + for i in xrange(1000): |
| + # do stuff |
| + if i % 10 == 0: |
| + inc(10) |
| + |
| + Args: |
| + fmt - String format with a single '%(count)d' where the counter value |
| + should go. |
| + enabled (bool) - If this is None, will default to True if |
| + logging.getLogger() is set to INFO or more verbose. |
| + stream (file-like) - The stream to print status messages to. |
| + period (float) - The time in seconds for the printer thread to wait |
| + between printing. |
| + """ |
| + self.fmt = fmt |
| + if enabled is None: # pragma: no cover |
| + self.enabled = logging.getLogger().isEnabledFor(logging.INFO) |
| + else: |
| + self.enabled = enabled |
| + |
| + self._count = 0 |
| + self._dead = False |
| + self._dead_cond = threading.Condition() |
| + self._stream = stream |
| + self._thread = threading.Thread(target=self._run) |
| + self._period = period |
| + |
| + def _emit(self, s): |
| + if self.enabled: |
| + self._stream.write('\r'+s) |
|
M-A Ruel
2013/11/13 19:55:00
self._stream.write('\r' + s)
iannucci
2013/11/14 07:06:29
d'oh. Why doesn't our presubmit barf on this?
M-A Ruel
2013/11/14 17:27:24
No idea.
|
| + self._stream.flush() |
| + |
| + def _run(self): |
| + with self._dead_cond: |
| + while not self._dead: |
| + self._emit(self.fmt % {'count': self._count}) |
| + self._dead_cond.wait(self._period) |
| + self._emit((self.fmt+'\n') % {'count': self._count}) |
| + |
| + def inc(self, amount=1): |
| + self._count += amount |
| + |
| + def __enter__(self): |
| + self._thread.start() |
| + return self.inc |
| + |
| + def __exit__(self, _exc_type, _exc_value, _traceback): |
| + self._dead = True |
| + with self._dead_cond: |
| + self._dead_cond.notifyAll() |
| + self._thread.join() |
| + del self._thread |
| + |
| + |
| +def parse_commitrefs(*commitrefs): |
| + """Returns binary encoded commit hashes for one or more commitrefs. |
| + |
| + A commitref is anything which can resolve to a commit. Popular examples: |
| + * "HEAD" |
| + * "origin/master" |
| + * "cool_branch~2" |
| + """ |
| + try: |
| + return map(binascii.unhexlify, hashes(*commitrefs)) |
| + except CalledProcessError: |
| + raise Exception('one of %s does not seem to be a valid commitref.' % |
| + str(commitrefs)) |
|
M-A Ruel
2013/11/13 19:55:00
str() is not needed by definition.
iannucci
2013/11/14 07:06:29
Well... it is because commitrefs is a tuple. I don
|
| + |
| + |
| +def _check_output(*popenargs, **kwargs): |
|
M-A Ruel
2013/11/13 19:55:00
Why not subprocess2.check_output() ? It works on 2
iannucci
2013/11/14 07:06:29
Good question. I'll use that.
|
| + """Runs a Popen command, and return the stdout as a string. |
| + |
| + Throws CalledProcessError if the command returns non-zero. |
| + |
| + kwargs: |
| + indata (str) - Data to provide to the command on stdin. Mutually exclusive |
| + with the Popen kwarg 'stdin'. |
| + |
| + Other than that, popenargs is *args to Popen, and **kwargs is... **kwargs to |
| + Popen. |
| + """ |
| + kwargs.setdefault('stdout', subprocess.PIPE) |
| + kwargs.setdefault('stderr', subprocess.PIPE) |
| + indata = kwargs.pop('indata', None) |
| + if indata is not None: |
| + kwargs['stdin'] = subprocess.PIPE |
| + process = subprocess.Popen(*popenargs, **kwargs) |
| + output, _ = process.communicate(indata) |
| + if process.returncode: |
| + cmd = kwargs.get('args') |
| + if cmd is None: |
| + cmd = popenargs[0] |
| + raise CalledProcessError(process.returncode, cmd) |
| + return output |
| + |
| + |
| +def run(*cmd, **kwargs): |
| + """Runs a git command. Returns stdout as a string. |
| + |
| + If logging is DEBUG, we'll print the command before we run it. |
| + |
| + kwargs |
| + autostrip (bool) - Strip the output. Defaults to True. |
| + Output string is always strip()'d. |
| + """ |
| + autostrip = kwargs.pop('autostrip', True) |
| + cmd = (GIT_EXE,) + cmd |
| + logging.debug('running: %s', " ".join(repr(tok) for tok in cmd)) |
| + ret = _check_output(cmd, **kwargs) |
| + if autostrip: |
| + ret = (ret or '').strip() |
| + return ret |
| + |
| + |
| +def hashes(*reflike): |
| + return run('rev-parse', *reflike).splitlines() |
| + |
| + |
| +def intern_f(f, kind='blob'): |
| + """Interns a file object into the git object store. |
| + |
| + Args: |
| + f (file-like object) - The file-like object to intern |
| + kind (git object type) - One of 'blob', 'commit', 'tree', 'tag'. |
| + |
| + Returns the git hash of the interned object (hex encoded). |
| + """ |
| + ret = run('hash-object', '-t', kind, '-w', '--stdin', stdin=f) |
| + f.close() |
| + return ret |
| + |
| + |
| +def tree(treeref, recurse=False): |
| + """Returns a dict representation of a git tree object. |
| + |
| + Args: |
| + treeref (str) - a git ref which resolves to a tree (commits count as trees). |
| + recurse (bool) - include all of the tree's decendants too. File names will |
| + take the form of 'some/path/to/file'. |
| + |
| + Return format: |
| + { 'file_name': (mode, type, ref) } |
| + |
| + mode is an integer where: |
| + * 0040000 - Directory |
| + * 0100644 - Regular non-executable file |
| + * 0100664 - Regular non-executable group-writeable file |
| + * 0100755 - Regular executable file |
| + * 0120000 - Symbolic link |
| + * 0160000 - Gitlink |
| + |
| + type is a string where it's one of 'blob', 'commit', 'tree', 'tag'. |
| + |
| + ref is the hex encoded hash of the entry. |
| + """ |
| + ret = {} |
| + opts = ['ls-tree', '--full-tree'] |
| + if recurse: |
| + opts += ['-r'] |
| + opts.append(treeref) |
| + try: |
| + for line in run(*opts).splitlines(): |
| + mode, typ, ref, name = line.split(None, 3) |
| + ret[name] = (mode, typ, ref) |
| + except CalledProcessError: |
| + return None |
| + return ret |
| + |
| + |
| +def mktree(treedict): |
| + """Makes a git tree object and returns its hash. |
| + |
| + See |tree()| for the values of mode, type, and ref. |
| + |
| + Args: |
| + treedict - { name: (mode, type, ref) } |
| + """ |
| + with tempfile.TemporaryFile() as f: |
| + for name, (mode, typ, ref) in treedict.iteritems(): |
| + f.write('%s %s %s\t%s\0' % (mode, typ, ref, name)) |
| + f.seek(0) |
| + return run('mktree', '-z', stdin=f) |