 Chromium Code Reviews
 Chromium Code Reviews Issue 26109002:
  Add git-number script to calculate generation numbers for commits.  (Closed) 
  Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
    
  
    Issue 26109002:
  Add git-number script to calculate generation numbers for commits.  (Closed) 
  Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools| Index: git_number.py | 
| diff --git a/git_number.py b/git_number.py | 
| new file mode 100755 | 
| index 0000000000000000000000000000000000000000..6607d7a30dc8da8bb8951aa4ef3d5b46a5c79d9d | 
| --- /dev/null | 
| +++ b/git_number.py | 
| @@ -0,0 +1,212 @@ | 
| +#!/usr/bin/env python | 
| +# Copyright (c) 2013 The Chromium Authors. All rights reserved. | 
| +# Use of this source code is governed by a BSD-style license that can be | 
| +# found in the LICENSE file. | 
| + | 
| +import collections | 
| +import os | 
| +import optparse | 
| +import struct | 
| +import subprocess | 
| +import tempfile | 
| + | 
| +import git_common | 
| +from git_common import git_hash, run_git, git_intern_f, git_tree | 
| 
M-A Ruel
2013/10/21 17:56:44
Many of them are used once or twice. This doesn't
 
iannucci
2013/10/22 07:28:22
Donez'd
 | 
| +from git_common import git_mktree, StatusPrinter, hexlify, unhexlify, pathlify | 
| +from git_common import parse_committish, ScopedPool, memoize_one | 
| + | 
| + | 
| +CHUNK_FMT = '!20sL' | 
| +CHUNK_SIZE = struct.calcsize(CHUNK_FMT) | 
| +DIRTY_TREES = collections.defaultdict(int) | 
| +REF = 'refs/number/commits' | 
| + | 
| +# Number of bytes to use for the prefix on our internal number structure. | 
| +# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would | 
| +# need to reimplement cache data structures to be a bit more sophistocated than | 
| 
M-A Ruel
2013/10/21 17:56:44
sophisticated
 
iannucci
2013/10/22 07:28:22
oops. Done.
 | 
| +# dicts. 1 seems to be just right. | 
| +PREFIX_LEN = 1 | 
| + | 
| + | 
| +@memoize_one | 
| +def get_number_tree(prefix_bytes): | 
| + """Return a dictionary of the blob contents specified by |prefix_bytes|. | 
| + This is in the form of {<full binary ref>: <gen num> ...} | 
| + | 
| + >>> get_number_tree('\x83\xb4') | 
| + {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} | 
| + """ | 
| + ret = {} | 
| + ref = '%s:%s' % (REF, pathlify(prefix_bytes)) | 
| + | 
| + p = subprocess.Popen(['git', 'cat-file', 'blob', ref], | 
| + stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 
| + p.stderr.close() | 
| + raw = buffer(p.stdout.read()) | 
| 
M-A Ruel
2013/10/21 17:56:44
Why not use .communicate()? You risk a pipe buffer
 
iannucci
2013/10/22 07:28:22
Yeah, this used to make more sense. Done.
 | 
| + for i in xrange(len(raw) / CHUNK_SIZE): | 
| + ref, num = struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) | 
| + ret[ref] = num | 
| + | 
| + return ret | 
| + | 
| + | 
| +@memoize_one | 
| +def get_num(ref): | 
| 
M-A Ruel
2013/10/21 17:56:44
A ref in git parlance is specifically not a hash.
 
iannucci
2013/10/22 07:28:22
You're right. hash is a built-in though... maybe c
 
M-A Ruel
2013/10/24 13:23:03
commit_hash
 
iannucci
2013/10/25 00:52:41
Done.
 | 
| + """Takes a hash and returns the generation number for it or None if the | 
| + ref is unknown.""" | 
| + return get_number_tree(ref[:PREFIX_LEN]).get(ref) | 
| + | 
| + | 
| +def intern_number_tree(tree): | 
| + """Transform a number tree (in the form returned by |get_number_tree|) into a | 
| 
M-A Ruel
2013/10/21 17:56:44
Transforms
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + git blob. | 
| + | 
| + Returns the git blob id as hex-encoded string. | 
| + | 
| + >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} | 
| + >>> intern_number_tree(d) | 
| + 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' | 
| + """ | 
| + with tempfile.TemporaryFile() as f: | 
| + for k, v in sorted(tree.iteritems()): | 
| + f.write(struct.pack(CHUNK_FMT, k, v)) | 
| + f.seek(0) | 
| + return git_intern_f(f) | 
| + | 
| + | 
| +def leaf_map_fn((pre, tree)): | 
| + """Converts a prefix and number tree into a git index line.""" | 
| + return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) | 
| + | 
| + | 
| +def finalize(targets): | 
| + """After calculating the generation number for |targets|, call finalize to | 
| 
M-A Ruel
2013/10/21 17:56:44
The "After .." part should be in the second line d
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + save all our work to the git repository. | 
| + """ | 
| + if not DIRTY_TREES: | 
| + return | 
| + | 
| + msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues()) | 
| + | 
| 
M-A Ruel
2013/10/21 17:56:44
one empty line max.
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + | 
| + idx = os.path.join(run_git('rev-parse', '--git-dir'), 'number.idx') | 
| + env = os.environ.copy() | 
| + env['GIT_INDEX_FILE'] = idx | 
| + | 
| + with StatusPrinter('Finalizing: (%%d/%d)' % len(DIRTY_TREES)) as inc: | 
| 
agable
2013/10/21 20:16:42
This half-formatted string is unfortunate. You cou
 
iannucci
2013/10/22 07:28:22
k. Done.
 | 
| + run_git('read-tree', REF, env=env) | 
| + | 
| + prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) | 
| + updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'], | 
| + stdin=subprocess.PIPE, env=env) | 
| + | 
| + with ScopedPool() as leaf_pool: | 
| + for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): | 
| + updater.stdin.write(item) | 
| + inc() | 
| + | 
| + updater.stdin.close() | 
| + updater.wait() | 
| + | 
| + commit_cmd = ['commit-tree', '-m', msg, '-p', git_hash(REF)] | 
| + for t in targets: | 
| + commit_cmd += ['-p', hexlify(t)] | 
| + commit_cmd.append(run_git('write-tree', env=env)) | 
| 
agable
2013/10/21 20:16:42
This is confusing, especially since you're calling
 
iannucci
2013/10/22 07:28:22
Hm... not sure I agree, but sure. Done.
 | 
| + commit_id = run_git(*commit_cmd) | 
| + run_git('update-ref', REF, commit_id) | 
| + | 
| + | 
| +def preload_tree(prefix): | 
| + """Returns the prefix and parsed tree object for the specified prefix.""" | 
| + return prefix, get_number_tree(prefix) | 
| + | 
| + | 
| +def all_prefixes(depth=PREFIX_LEN): | 
| + for x in (chr(i) for i in xrange(255)): | 
| + if depth > 1: | 
| + for r in all_prefixes(depth-1): | 
| + yield x+r | 
| + else: | 
| + yield x | 
| + | 
| + | 
| +def load(targets): | 
| + """Load/calculate the generation numbers for targets. | 
| + | 
| + Args: | 
| + targets - An iterable of binary-encoded full git commit id hashes. | 
| + """ | 
| + if all(get_num(t) is not None for t in targets): | 
| + return | 
| + | 
| + if git_tree(REF) is None: | 
| + empty = git_mktree({}) | 
| + ref = run_git('commit-tree', '-m', 'Initial commit from git-number', empty) | 
| + run_git('update-ref', REF, ref) | 
| + | 
| + with ScopedPool() as pool: | 
| + preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) | 
| + | 
| + rev_list = [] | 
| + | 
| + with StatusPrinter('Loading commits: %d') as inc: | 
| + # Curiously, buffering the list into memory seems to be the fastest | 
| + # approach in python (as opposed to iterating over the lines in the | 
| + # stdout as they're produced). GIL strikes again :/ | 
| + cmd = [ | 
| + 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF | 
| + ] + map(hexlify, targets) | 
| + for line in run_git(*cmd).splitlines(): | 
| + toks = map(unhexlify, line.split()) | 
| 
agable
2013/10/21 20:16:42
tokens
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + rev_list.append((toks[0], toks[1:])) | 
| + inc() | 
| + | 
| + for prefix, tree in preload_iter: | 
| + get_number_tree.cache[prefix] = tree | 
| + | 
| + with StatusPrinter('Counting: %%d/%d' % len(rev_list)) as inc: | 
| + for ref, pars in rev_list: | 
| + num = max(map(get_num, pars)) + 1 if pars else 0 | 
| + | 
| + prefix = ref[:PREFIX_LEN] | 
| + get_number_tree(prefix)[ref] = num | 
| + DIRTY_TREES[prefix] += 1 | 
| + get_num.cache[ref] = num | 
| + | 
| + inc() | 
| + | 
| + | 
| +def main(): | 
| + parser = optparse.OptionParser( | 
| + usage='usage: %prog [options] [<committish>]\n\n' | 
| + '<committish> defaults to HEAD') | 
| + parser.add_option('--no-cache', action='store_true', | 
| + help='Do not actually cache anything we calculate.') | 
| + parser.add_option('--reset', action='store_true', | 
| + help='Reset the generation number cache and quit.') | 
| + parser.add_option('-v', '--verbose', action='count', | 
| 
agable
2013/10/21 20:16:42
default=0
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + help='Be verbose. Use more times for more verbosity.') | 
| + opts, args = parser.parse_args() | 
| + | 
| + if not args: | 
| + args = ['HEAD'] | 
| + | 
| + if opts.verbose: | 
| 
M-A Ruel
2013/10/21 17:56:44
Why do it conditionally?
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + git_common.VERBOSE_LEVEL = opts.verbose | 
| + | 
| + if opts.reset: | 
| + run_git('update-ref', '-d', REF) | 
| 
M-A Ruel
2013/10/21 17:56:44
I'd prefer to return 0 right after instead of usin
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + else: | 
| + targets = parse_committish(*args) | 
| + load(targets) | 
| + for t in targets: | 
| + print get_num(t) | 
| + if not opts.no_cache: | 
| + finalize(targets) | 
| 
M-A Ruel
2013/10/21 17:56:44
return 0 after.
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + | 
| + | 
| +if __name__ == '__main__': | 
| + try: | 
| + main() | 
| 
M-A Ruel
2013/10/21 17:56:44
sys.exit(main())
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + except KeyboardInterrupt: | 
| 
M-A Ruel
2013/10/21 17:56:44
I'd prefer this to be handled inside the main().
 
iannucci
2013/10/22 07:28:22
Done.
 | 
| + pass |