Index: git_number.py |
diff --git a/git_number.py b/git_number.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..6607d7a30dc8da8bb8951aa4ef3d5b46a5c79d9d |
--- /dev/null |
+++ b/git_number.py |
@@ -0,0 +1,212 @@ |
+#!/usr/bin/env python |
+# Copyright (c) 2013 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import collections |
+import os |
+import optparse |
+import struct |
+import subprocess |
+import tempfile |
+ |
+import git_common |
+from git_common import git_hash, run_git, git_intern_f, git_tree |
M-A Ruel
2013/10/21 17:56:44
Many of them are used once or twice. This doesn't
iannucci
2013/10/22 07:28:22
Donez'd
|
+from git_common import git_mktree, StatusPrinter, hexlify, unhexlify, pathlify |
+from git_common import parse_committish, ScopedPool, memoize_one |
+ |
+ |
+CHUNK_FMT = '!20sL' |
+CHUNK_SIZE = struct.calcsize(CHUNK_FMT) |
+DIRTY_TREES = collections.defaultdict(int) |
+REF = 'refs/number/commits' |
+ |
+# Number of bytes to use for the prefix on our internal number structure. |
+# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would |
+# need to reimplement cache data structures to be a bit more sophistocated than |
M-A Ruel
2013/10/21 17:56:44
sophisticated
iannucci
2013/10/22 07:28:22
oops. Done.
|
+# dicts. 1 seems to be just right. |
+PREFIX_LEN = 1 |
+ |
+ |
+@memoize_one |
+def get_number_tree(prefix_bytes): |
+ """Return a dictionary of the blob contents specified by |prefix_bytes|. |
+ This is in the form of {<full binary ref>: <gen num> ...} |
+ |
+ >>> get_number_tree('\x83\xb4') |
+ {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} |
+ """ |
+ ret = {} |
+ ref = '%s:%s' % (REF, pathlify(prefix_bytes)) |
+ |
+ p = subprocess.Popen(['git', 'cat-file', 'blob', ref], |
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
+ p.stderr.close() |
+ raw = buffer(p.stdout.read()) |
M-A Ruel
2013/10/21 17:56:44
Why not use .communicate()? You risk a pipe buffer
iannucci
2013/10/22 07:28:22
Yeah, this used to make more sense. Done.
|
+ for i in xrange(len(raw) / CHUNK_SIZE): |
+ ref, num = struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) |
+ ret[ref] = num |
+ |
+ return ret |
+ |
+ |
+@memoize_one |
+def get_num(ref): |
M-A Ruel
2013/10/21 17:56:44
A ref in git parlance is specifically not a hash.
iannucci
2013/10/22 07:28:22
You're right. hash is a built-in though... maybe c
M-A Ruel
2013/10/24 13:23:03
commit_hash
iannucci
2013/10/25 00:52:41
Done.
|
+ """Takes a hash and returns the generation number for it or None if the |
+ ref is unknown.""" |
+ return get_number_tree(ref[:PREFIX_LEN]).get(ref) |
+ |
+ |
+def intern_number_tree(tree): |
+ """Transform a number tree (in the form returned by |get_number_tree|) into a |
M-A Ruel
2013/10/21 17:56:44
Transforms
iannucci
2013/10/22 07:28:22
Done.
|
+ git blob. |
+ |
+ Returns the git blob id as hex-encoded string. |
+ |
+ >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} |
+ >>> intern_number_tree(d) |
+ 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' |
+ """ |
+ with tempfile.TemporaryFile() as f: |
+ for k, v in sorted(tree.iteritems()): |
+ f.write(struct.pack(CHUNK_FMT, k, v)) |
+ f.seek(0) |
+ return git_intern_f(f) |
+ |
+ |
+def leaf_map_fn((pre, tree)): |
+ """Converts a prefix and number tree into a git index line.""" |
+ return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) |
+ |
+ |
+def finalize(targets): |
+ """After calculating the generation number for |targets|, call finalize to |
M-A Ruel
2013/10/21 17:56:44
The "After .." part should be in the second line d
iannucci
2013/10/22 07:28:22
Done.
|
+ save all our work to the git repository. |
+ """ |
+ if not DIRTY_TREES: |
+ return |
+ |
+ msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues()) |
+ |
M-A Ruel
2013/10/21 17:56:44
one empty line max.
iannucci
2013/10/22 07:28:22
Done.
|
+ |
+ idx = os.path.join(run_git('rev-parse', '--git-dir'), 'number.idx') |
+ env = os.environ.copy() |
+ env['GIT_INDEX_FILE'] = idx |
+ |
+ with StatusPrinter('Finalizing: (%%d/%d)' % len(DIRTY_TREES)) as inc: |
agable
2013/10/21 20:16:42
This half-formatted string is unfortunate. You cou
iannucci
2013/10/22 07:28:22
k. Done.
|
+ run_git('read-tree', REF, env=env) |
+ |
+ prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) |
+ updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'], |
+ stdin=subprocess.PIPE, env=env) |
+ |
+ with ScopedPool() as leaf_pool: |
+ for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): |
+ updater.stdin.write(item) |
+ inc() |
+ |
+ updater.stdin.close() |
+ updater.wait() |
+ |
+ commit_cmd = ['commit-tree', '-m', msg, '-p', git_hash(REF)] |
+ for t in targets: |
+ commit_cmd += ['-p', hexlify(t)] |
+ commit_cmd.append(run_git('write-tree', env=env)) |
agable
2013/10/21 20:16:42
This is confusing, especially since you're calling
iannucci
2013/10/22 07:28:22
Hm... not sure I agree, but sure. Done.
|
+ commit_id = run_git(*commit_cmd) |
+ run_git('update-ref', REF, commit_id) |
+ |
+ |
+def preload_tree(prefix): |
+ """Returns the prefix and parsed tree object for the specified prefix.""" |
+ return prefix, get_number_tree(prefix) |
+ |
+ |
+def all_prefixes(depth=PREFIX_LEN): |
+ for x in (chr(i) for i in xrange(255)): |
+ if depth > 1: |
+ for r in all_prefixes(depth-1): |
+ yield x+r |
+ else: |
+ yield x |
+ |
+ |
+def load(targets): |
+ """Load/calculate the generation numbers for targets. |
+ |
+ Args: |
+ targets - An iterable of binary-encoded full git commit id hashes. |
+ """ |
+ if all(get_num(t) is not None for t in targets): |
+ return |
+ |
+ if git_tree(REF) is None: |
+ empty = git_mktree({}) |
+ ref = run_git('commit-tree', '-m', 'Initial commit from git-number', empty) |
+ run_git('update-ref', REF, ref) |
+ |
+ with ScopedPool() as pool: |
+ preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) |
+ |
+ rev_list = [] |
+ |
+ with StatusPrinter('Loading commits: %d') as inc: |
+ # Curiously, buffering the list into memory seems to be the fastest |
+ # approach in python (as opposed to iterating over the lines in the |
+ # stdout as they're produced). GIL strikes again :/ |
+ cmd = [ |
+ 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF |
+ ] + map(hexlify, targets) |
+ for line in run_git(*cmd).splitlines(): |
+ toks = map(unhexlify, line.split()) |
agable
2013/10/21 20:16:42
tokens
iannucci
2013/10/22 07:28:22
Done.
|
+ rev_list.append((toks[0], toks[1:])) |
+ inc() |
+ |
+ for prefix, tree in preload_iter: |
+ get_number_tree.cache[prefix] = tree |
+ |
+ with StatusPrinter('Counting: %%d/%d' % len(rev_list)) as inc: |
+ for ref, pars in rev_list: |
+ num = max(map(get_num, pars)) + 1 if pars else 0 |
+ |
+ prefix = ref[:PREFIX_LEN] |
+ get_number_tree(prefix)[ref] = num |
+ DIRTY_TREES[prefix] += 1 |
+ get_num.cache[ref] = num |
+ |
+ inc() |
+ |
+ |
+def main(): |
+ parser = optparse.OptionParser( |
+ usage='usage: %prog [options] [<committish>]\n\n' |
+ '<committish> defaults to HEAD') |
+ parser.add_option('--no-cache', action='store_true', |
+ help='Do not actually cache anything we calculate.') |
+ parser.add_option('--reset', action='store_true', |
+ help='Reset the generation number cache and quit.') |
+ parser.add_option('-v', '--verbose', action='count', |
agable
2013/10/21 20:16:42
default=0
iannucci
2013/10/22 07:28:22
Done.
|
+ help='Be verbose. Use more times for more verbosity.') |
+ opts, args = parser.parse_args() |
+ |
+ if not args: |
+ args = ['HEAD'] |
+ |
+ if opts.verbose: |
M-A Ruel
2013/10/21 17:56:44
Why do it conditionally?
iannucci
2013/10/22 07:28:22
Done.
|
+ git_common.VERBOSE_LEVEL = opts.verbose |
+ |
+ if opts.reset: |
+ run_git('update-ref', '-d', REF) |
M-A Ruel
2013/10/21 17:56:44
I'd prefer to return 0 right after instead of usin
iannucci
2013/10/22 07:28:22
Done.
|
+ else: |
+ targets = parse_committish(*args) |
+ load(targets) |
+ for t in targets: |
+ print get_num(t) |
+ if not opts.no_cache: |
+ finalize(targets) |
M-A Ruel
2013/10/21 17:56:44
return 0 after.
iannucci
2013/10/22 07:28:22
Done.
|
+ |
+ |
+if __name__ == '__main__': |
+ try: |
+ main() |
M-A Ruel
2013/10/21 17:56:44
sys.exit(main())
iannucci
2013/10/22 07:28:22
Done.
|
+ except KeyboardInterrupt: |
M-A Ruel
2013/10/21 17:56:44
I'd prefer this to be handled inside the main().
iannucci
2013/10/22 07:28:22
Done.
|
+ pass |