Chromium Code Reviews| Index: git_number.py | 
| diff --git a/git_number.py b/git_number.py | 
| new file mode 100755 | 
| index 0000000000000000000000000000000000000000..602055b0ebc78dcdd910bfcf5112589a2750b65f | 
| --- /dev/null | 
| +++ b/git_number.py | 
| @@ -0,0 +1,259 @@ | 
| +#!/usr/bin/env python | 
| +# Copyright (c) 2013 The Chromium Authors. All rights reserved. | 
| +# Use of this source code is governed by a BSD-style license that can be | 
| +# found in the LICENSE file. | 
| + | 
| +"""Usage: %prog [options] [<commitref>]* | 
| + | 
| +If no <commitref>'s are supplied, it defaults to HEAD. | 
| + | 
| +Calculates the generation number for one or more commits in a git repo. | 
| + | 
| +Generation number of a commit C with parents P is defined as: | 
| + generation_number(C, []) = 0 | 
| + generation_number(C, P) = max(map(generation_number, P)) + 1 | 
| + | 
| +This number can be used to order commits relative to each other, as long as for | 
| +any pair of the commits, one is an ancestor of the other. | 
| + | 
| +Since calculating the generation number of a commit requires walking that | 
| +commit's entire history, this script caches all calculated data inside the git | 
| +repo that it operates on in the ref 'refs/number/commits'. | 
| +""" | 
| + | 
| +import binascii | 
| +import collections | 
| +import logging | 
| +import optparse | 
| +import os | 
| +import struct | 
| +import subprocess2 | 
| +import sys | 
| +import tempfile | 
| + | 
| +import git_common as git | 
| + | 
| +CHUNK_FMT = '!20sL' | 
| +CHUNK_SIZE = struct.calcsize(CHUNK_FMT) | 
| +DIRTY_TREES = collections.defaultdict(int) | 
| +REF = 'refs/number/commits' | 
| + | 
| +# Number of bytes to use for the prefix on our internal number structure. | 
| +# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would | 
| +# need to reimplement cache data structures to be a bit more sophisticated than | 
| +# dicts. 1 seems to be just right. | 
| +PREFIX_LEN = 1 | 
| + | 
| +# Set this to 'threads' to gather coverage data while testing. | 
| +POOL_KIND = 'procs' | 
| + | 
| + | 
| +def pathlify(hash_prefix): | 
| + """Converts a binary object hash prefix into a posix path, one folder per | 
| + byte. | 
| + | 
| + >>> pathlify('\xDE\xAD') | 
| + 'de/ad' | 
| + """ | 
| + return '/'.join('%02x' % ord(b) for b in hash_prefix) | 
| + | 
| + | 
| +@git.memoize_one(threadsafe=False) | 
| +def get_number_tree(prefix_bytes): | 
| + """Returns a dictionary of the git-number registry specified by | 
| + |prefix_bytes|. | 
| + | 
| + This is in the form of {<full binary ref>: <gen num> ...} | 
| + | 
| + >>> get_number_tree('\x83\xb4') | 
| + {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} | 
| + """ | 
| + ref = '%s:%s' % (REF, pathlify(prefix_bytes)) | 
| + | 
| + try: | 
| + raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False)) | 
| + return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) | 
| + for i in xrange(len(raw) / CHUNK_SIZE)) | 
| + except subprocess2.CalledProcessError: | 
| + return {} | 
| + | 
| + | 
| +@git.memoize_one(threadsafe=False) | 
| +def get_num(commit_hash): | 
| + """Takes a commit hash and returns the generation number for it or None if | 
| + the commit hash doesn't have a computed value yet.""" | 
| + return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash) | 
| + | 
| + | 
| +def clear_caches(on_disk=False): | 
| + """Clears in-process caches for e.g. unit testing.""" | 
| + get_number_tree.clear() | 
| + get_num.clear() | 
| + if on_disk: | 
| + git.run('update-ref', '-d', REF) | 
| + | 
| + | 
| +def intern_number_tree(tree): | 
| + """Transforms a number tree (in the form returned by |get_number_tree|) into | 
| + a git blob. | 
| + | 
| + Returns the git blob id as hex-encoded string. | 
| + | 
| + >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} | 
| + >>> intern_number_tree(d) | 
| + 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' | 
| + """ | 
| + with tempfile.TemporaryFile() as f: | 
| + for k, v in sorted(tree.iteritems()): | 
| + f.write(struct.pack(CHUNK_FMT, k, v)) | 
| + f.seek(0) | 
| + return git.intern_f(f) | 
| 
 
M-A Ruel
2013/11/14 17:27:24
I don't think it'll work on Windows. This is becau
 
iannucci
2013/11/15 02:12:58
Totally works :)
 
 | 
| + | 
| + | 
| +def leaf_map_fn((pre, tree)): | 
| + """Converts a prefix and number tree into a git index line.""" | 
| + return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) | 
| + | 
| + | 
| +def finalize(targets): | 
| + """Saves all cache data to the git repository. | 
| + | 
| + After calculating the generation number for |targets|, call finalize() to | 
| + save all the work to the git repository. | 
| + | 
| + This in particular saves the trees referred to by DIRTY_TREES. | 
| + """ | 
| + if not DIRTY_TREES: | 
| + return | 
| + | 
| + msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues()) | 
| + | 
| + idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx') | 
| + env = os.environ.copy() | 
| + env['GIT_INDEX_FILE'] = idx | 
| + | 
| + progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES) | 
| + with git.ProgressPrinter(progress_message) as inc: | 
| + git.run('read-tree', REF, env=env) | 
| + | 
| + prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) | 
| + updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'], | 
| + stdin=subprocess2.PIPE, env=env) | 
| + | 
| + with git.ScopedPool(kind=POOL_KIND) as leaf_pool: | 
| + for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): | 
| + updater.stdin.write(item) | 
| + inc() | 
| + | 
| + updater.stdin.close() | 
| + updater.wait() | 
| + assert updater.returncode == 0 | 
| + | 
| + tree_id = git.run('write-tree', env=env) | 
| + commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF) | 
| + for t in targets: | 
| + commit_cmd += ['-p', binascii.hexlify(t)] | 
| + commit_cmd.append(tree_id) | 
| + commit_hash = git.run(*commit_cmd) | 
| + git.run('update-ref', REF, commit_hash) | 
| + DIRTY_TREES.clear() | 
| + | 
| + | 
| +def preload_tree(prefix): | 
| + """Returns the prefix and parsed tree object for the specified prefix.""" | 
| + return prefix, get_number_tree(prefix) | 
| + | 
| + | 
| +def all_prefixes(depth=PREFIX_LEN): | 
| + for x in (chr(i) for i in xrange(255)): | 
| + # This isn't covered because PREFIX_LEN currently == 1 | 
| + if depth > 1: # pragma: no cover | 
| + for r in all_prefixes(depth - 1): | 
| + yield x + r | 
| + else: | 
| + yield x | 
| + | 
| + | 
| +def load_generation_numbers(targets): | 
| + """Populates the caches of get_num and get_number_tree so they contain | 
| + the results for |targets|. | 
| + | 
| + Loads cached numbers from disk, and calculates missing numbers if one or | 
| + more of |targets| is newer than the cached calculations. | 
| + | 
| + Args: | 
| + targets - An iterable of binary-encoded full git commit hashes. | 
| + """ | 
| + if all(get_num(t) is not None for t in targets): | 
| 
 
M-A Ruel
2013/11/14 17:27:24
This is somewhat dangerous. If someone uses a gene
 
iannucci
2013/11/15 02:12:58
hm, good point. I'll list-ify it.
 
 | 
| + return | 
| + | 
| + if git.tree(REF) is None: | 
| + empty = git.mktree({}) | 
| + commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number', | 
| + empty) | 
| + git.run('update-ref', REF, commit_hash) | 
| + | 
| + with git.ScopedPool(kind=POOL_KIND) as pool: | 
| + preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) | 
| + | 
| + rev_list = [] | 
| + | 
| + with git.ProgressPrinter('Loading commits: %(count)d') as inc: | 
| + # Curiously, buffering the list into memory seems to be the fastest | 
| + # approach in python (as opposed to iterating over the lines in the | 
| + # stdout as they're produced). GIL strikes again :/ | 
| + cmd = [ | 
| + 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF | 
| + ] + map(binascii.hexlify, targets) | 
| + for line in git.run(*cmd).splitlines(): | 
| + tokens = map(binascii.unhexlify, line.split()) | 
| + rev_list.append((tokens[0], tokens[1:])) | 
| + inc() | 
| + | 
| + get_number_tree.update(preload_iter) | 
| + | 
| + with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc: | 
| + for commit_hash, pars in rev_list: | 
| + num = max(map(get_num, pars)) + 1 if pars else 0 | 
| + | 
| + prefix = commit_hash[:PREFIX_LEN] | 
| + get_number_tree(prefix)[commit_hash] = num | 
| + DIRTY_TREES[prefix] += 1 | 
| + get_num.set(commit_hash, num) | 
| + | 
| + inc() | 
| + | 
| + | 
| +def main(): # pragma: no cover | 
| + parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__) | 
| + parser.add_option('--no-cache', action='store_true', | 
| + help='Do not actually cache anything we calculate.') | 
| + parser.add_option('--reset', action='store_true', | 
| + help='Reset the generation number cache and quit.') | 
| + parser.add_option('-v', '--verbose', action='count', default=0, | 
| + help='Be verbose. Use more times for more verbosity.') | 
| + opts, args = parser.parse_args() | 
| + | 
| + if opts.verbose == 1: | 
| + logging.getLogger().setLevel(logging.INFO) | 
| + elif opts.verbose >= 2: | 
| + logging.getLogger().setLevel(logging.DEBUG) | 
| + | 
| + try: | 
| + if opts.reset: | 
| + clear_caches(on_disk=True) | 
| + return | 
| + | 
| + targets = git.parse_commitrefs(*(args or ['HEAD'])) | 
| + load_generation_numbers(targets) | 
| + if not opts.no_cache: | 
| + finalize(targets) | 
| + | 
| + print '\n'.join(str(get_num(x)) for x in targets) | 
| + return 0 | 
| + except KeyboardInterrupt: | 
| + return 1 | 
| + | 
| + | 
| +if __name__ == '__main__': # pragma: no cover | 
| + sys.exit(main()) |