| Index: git_number.py
|
| diff --git a/git_number.py b/git_number.py
|
| deleted file mode 100755
|
| index 04f676c655d8db6c75c36a6205e1ef152365e8e3..0000000000000000000000000000000000000000
|
| --- a/git_number.py
|
| +++ /dev/null
|
| @@ -1,267 +0,0 @@
|
| -#!/usr/bin/env python
|
| -# Copyright 2013 The Chromium Authors. All rights reserved.
|
| -# Use of this source code is governed by a BSD-style license that can be
|
| -# found in the LICENSE file.
|
| -
|
| -"""Usage: %prog [options] [<commitref>]*
|
| -
|
| -If no <commitref>'s are supplied, it defaults to HEAD.
|
| -
|
| -Calculates the generation number for one or more commits in a git repo.
|
| -
|
| -Generation number of a commit C with parents P is defined as:
|
| - generation_number(C, []) = 0
|
| - generation_number(C, P) = max(map(generation_number, P)) + 1
|
| -
|
| -This number can be used to order commits relative to each other, as long as for
|
| -any pair of the commits, one is an ancestor of the other.
|
| -
|
| -Since calculating the generation number of a commit requires walking that
|
| -commit's entire history, this script caches all calculated data inside the git
|
| -repo that it operates on in the ref 'refs/number/commits'.
|
| -"""
|
| -
|
| -import binascii
|
| -import collections
|
| -import logging
|
| -import optparse
|
| -import os
|
| -import struct
|
| -import sys
|
| -import tempfile
|
| -
|
| -import git_common as git
|
| -import subprocess2
|
| -
|
| -CHUNK_FMT = '!20sL'
|
| -CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
|
| -DIRTY_TREES = collections.defaultdict(int)
|
| -REF = 'refs/number/commits'
|
| -
|
| -# Number of bytes to use for the prefix on our internal number structure.
|
| -# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
|
| -# need to reimplement cache data structures to be a bit more sophisticated than
|
| -# dicts. 1 seems to be just right.
|
| -PREFIX_LEN = 1
|
| -
|
| -# Set this to 'threads' to gather coverage data while testing.
|
| -POOL_KIND = 'procs'
|
| -
|
| -
|
| -def pathlify(hash_prefix):
|
| - """Converts a binary object hash prefix into a posix path, one folder per
|
| - byte.
|
| -
|
| - >>> pathlify('\xDE\xAD')
|
| - 'de/ad'
|
| - """
|
| - return '/'.join('%02x' % ord(b) for b in hash_prefix)
|
| -
|
| -
|
| -@git.memoize_one(threadsafe=False)
|
| -def get_number_tree(prefix_bytes):
|
| - """Returns a dictionary of the git-number registry specified by
|
| - |prefix_bytes|.
|
| -
|
| - This is in the form of {<full binary ref>: <gen num> ...}
|
| -
|
| - >>> get_number_tree('\x83\xb4')
|
| - {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
|
| - """
|
| - ref = '%s:%s' % (REF, pathlify(prefix_bytes))
|
| -
|
| - try:
|
| - raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
|
| - return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
|
| - for i in xrange(len(raw) / CHUNK_SIZE))
|
| - except subprocess2.CalledProcessError:
|
| - return {}
|
| -
|
| -
|
| -@git.memoize_one(threadsafe=False)
|
| -def get_num(commit_hash):
|
| - """Returns the generation number for a commit.
|
| -
|
| - Returns None if the generation number for this commit hasn't been calculated
|
| - yet (see load_generation_numbers()).
|
| - """
|
| - return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
|
| -
|
| -
|
| -def clear_caches(on_disk=False):
|
| - """Clears in-process caches for e.g. unit testing."""
|
| - get_number_tree.clear()
|
| - get_num.clear()
|
| - if on_disk:
|
| - git.run('update-ref', '-d', REF)
|
| -
|
| -
|
| -def intern_number_tree(tree):
|
| - """Transforms a number tree (in the form returned by |get_number_tree|) into
|
| - a git blob.
|
| -
|
| - Returns the git blob id as hex-encoded string.
|
| -
|
| - >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
|
| - >>> intern_number_tree(d)
|
| - 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
|
| - """
|
| - with tempfile.TemporaryFile() as f:
|
| - for k, v in sorted(tree.iteritems()):
|
| - f.write(struct.pack(CHUNK_FMT, k, v))
|
| - f.seek(0)
|
| - return git.intern_f(f)
|
| -
|
| -
|
| -def leaf_map_fn((pre, tree)):
|
| - """Converts a prefix and number tree into a git index line."""
|
| - return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
|
| -
|
| -
|
| -def finalize(targets):
|
| - """Saves all cache data to the git repository.
|
| -
|
| - After calculating the generation number for |targets|, call finalize() to
|
| - save all the work to the git repository.
|
| -
|
| - This in particular saves the trees referred to by DIRTY_TREES.
|
| - """
|
| - if not DIRTY_TREES:
|
| - return
|
| -
|
| - msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
|
| -
|
| - idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
|
| - env = os.environ.copy()
|
| - env['GIT_INDEX_FILE'] = idx
|
| -
|
| - progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
|
| - with git.ProgressPrinter(progress_message) as inc:
|
| - git.run('read-tree', REF, env=env)
|
| -
|
| - prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
|
| - updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
|
| - stdin=subprocess2.PIPE, env=env)
|
| -
|
| - with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
|
| - for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
|
| - updater.stdin.write(item)
|
| - inc()
|
| -
|
| - updater.stdin.close()
|
| - updater.wait()
|
| - assert updater.returncode == 0
|
| -
|
| - tree_id = git.run('write-tree', env=env)
|
| - commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF)
|
| - for t in targets:
|
| - commit_cmd.extend(['-p', binascii.hexlify(t)])
|
| - commit_cmd.append(tree_id)
|
| - commit_hash = git.run(*commit_cmd)
|
| - git.run('update-ref', REF, commit_hash)
|
| - DIRTY_TREES.clear()
|
| -
|
| -
|
| -def preload_tree(prefix):
|
| - """Returns the prefix and parsed tree object for the specified prefix."""
|
| - return prefix, get_number_tree(prefix)
|
| -
|
| -
|
| -def all_prefixes(depth=PREFIX_LEN):
|
| - for x in (chr(i) for i in xrange(255)):
|
| - # This isn't covered because PREFIX_LEN currently == 1
|
| - if depth > 1: # pragma: no cover
|
| - for r in all_prefixes(depth - 1):
|
| - yield x + r
|
| - else:
|
| - yield x
|
| -
|
| -
|
| -def load_generation_numbers(targets):
|
| - """Populates the caches of get_num and get_number_tree so they contain
|
| - the results for |targets|.
|
| -
|
| - Loads cached numbers from disk, and calculates missing numbers if one or
|
| - more of |targets| is newer than the cached calculations.
|
| -
|
| - Args:
|
| - targets - An iterable of binary-encoded full git commit hashes.
|
| - """
|
| - # In case they pass us a generator, listify targets.
|
| - targets = list(targets)
|
| -
|
| - if all(get_num(t) is not None for t in targets):
|
| - return
|
| -
|
| - if git.tree(REF) is None:
|
| - empty = git.mktree({})
|
| - commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
|
| - empty)
|
| - git.run('update-ref', REF, commit_hash)
|
| -
|
| - with git.ScopedPool(kind=POOL_KIND) as pool:
|
| - preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
|
| -
|
| - rev_list = []
|
| -
|
| - with git.ProgressPrinter('Loading commits: %(count)d') as inc:
|
| - # Curiously, buffering the list into memory seems to be the fastest
|
| - # approach in python (as opposed to iterating over the lines in the
|
| - # stdout as they're produced). GIL strikes again :/
|
| - cmd = [
|
| - 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF,
|
| - ] + map(binascii.hexlify, targets)
|
| - for line in git.run(*cmd).splitlines():
|
| - tokens = map(binascii.unhexlify, line.split())
|
| - rev_list.append((tokens[0], tokens[1:]))
|
| - inc()
|
| -
|
| - get_number_tree.update(preload_iter)
|
| -
|
| - with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
|
| - for commit_hash, pars in rev_list:
|
| - num = max(map(get_num, pars)) + 1 if pars else 0
|
| -
|
| - prefix = commit_hash[:PREFIX_LEN]
|
| - get_number_tree(prefix)[commit_hash] = num
|
| - DIRTY_TREES[prefix] += 1
|
| - get_num.set(commit_hash, num)
|
| -
|
| - inc()
|
| -
|
| -
|
| -def main(): # pragma: no cover
|
| - parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
|
| - parser.add_option('--no-cache', action='store_true',
|
| - help='Do not actually cache anything we calculate.')
|
| - parser.add_option('--reset', action='store_true',
|
| - help='Reset the generation number cache and quit.')
|
| - parser.add_option('-v', '--verbose', action='count', default=0,
|
| - help='Be verbose. Use more times for more verbosity.')
|
| - opts, args = parser.parse_args()
|
| -
|
| - levels = [logging.ERROR, logging.INFO, logging.DEBUG]
|
| - logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
|
| -
|
| - try:
|
| - if opts.reset:
|
| - clear_caches(on_disk=True)
|
| - return
|
| -
|
| - try:
|
| - targets = git.parse_commitrefs(*(args or ['HEAD']))
|
| - except git.BadCommitRefException as e:
|
| - parser.error(e)
|
| -
|
| - load_generation_numbers(targets)
|
| - if not opts.no_cache:
|
| - finalize(targets)
|
| -
|
| - print '\n'.join(map(str, map(get_num, targets)))
|
| - return 0
|
| - except KeyboardInterrupt:
|
| - return 1
|
| -
|
| -
|
| -if __name__ == '__main__': # pragma: no cover
|
| - sys.exit(main())
|
|
|