OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Usage: %prog [options] [<commitref>]* | |
7 | |
8 If no <commitref>'s are supplied, it defaults to HEAD. | |
9 | |
10 Calculates the generation number for one or more commits in a git repo. | |
11 | |
12 Generation number of a commit C with parents P is defined as: | |
13 generation_number(C, []) = 0 | |
14 generation_number(C, P) = max(map(generation_number, P)) + 1 | |
15 | |
16 This number can be used to order commits relative to each other, as long as for | |
17 any pair of the commits, one is an ancestor of the other. | |
18 | |
19 Since calculating the generation number of a commit requires walking that | |
20 commit's entire history, this script caches all calculated data inside the git | |
21 repo that it operates on in the ref 'refs/number/commits'. | |
22 """ | |
23 | |
24 import binascii | |
25 import collections | |
26 import logging | |
27 import optparse | |
28 import os | |
29 import struct | |
30 import subprocess | |
31 import sys | |
32 import tempfile | |
33 | |
34 import git_common as git | |
35 | |
36 CHUNK_FMT = '!20sL' | |
37 CHUNK_SIZE = struct.calcsize(CHUNK_FMT) | |
38 DIRTY_TREES = collections.defaultdict(int) | |
39 REF = 'refs/number/commits' | |
40 | |
41 # Number of bytes to use for the prefix on our internal number structure. | |
42 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would | |
43 # need to reimplement cache data structures to be a bit more sophisticated than | |
44 # dicts. 1 seems to be just right. | |
45 PREFIX_LEN = 1 | |
46 | |
47 # Set this to 'threads' to gather coverage data while testing. | |
48 POOL_KIND = 'procs' | |
49 | |
50 | |
51 def pathlify(hash_prefix): | |
52 """Converts a binary object hash prefix into a posix path, one folder per | |
53 byte. | |
54 | |
55 >>> pathlify('\xDE\xAD') | |
56 'de/ad' | |
57 """ | |
58 return '/'.join('%02x' % ord(b) for b in hash_prefix) | |
59 | |
60 | |
61 @git.memoize_one(threadsafe=False) | |
62 def get_number_tree(prefix_bytes): | |
63 """Returns a dictionary of the git-number registry specified by | |
64 |prefix_bytes|. | |
65 | |
66 This is in the form of {<full binary ref>: <gen num> ...} | |
67 | |
68 >>> get_number_tree('\x83\xb4') | |
69 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} | |
70 """ | |
71 ref = '%s:%s' % (REF, pathlify(prefix_bytes)) | |
72 | |
73 try: | |
74 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False)) | |
75 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) | |
76 for i in xrange(len(raw) / CHUNK_SIZE)) | |
77 except git.CalledProcessError: | |
78 return {} | |
79 | |
80 | |
81 @git.memoize_one(threadsafe=False) | |
82 def get_num(commit_hash): | |
83 """Takes a commit hash and returns the generation number for it or None if | |
84 the commit hash doesn't have a computed value yet.""" | |
85 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash) | |
86 | |
87 | |
88 def clear_caches(): | |
89 """Clears in-process caches for e.g. unit testing.""" | |
90 get_number_tree.clear() | |
91 get_num.clear() | |
92 | |
93 | |
94 def intern_number_tree(tree): | |
95 """Transforms a number tree (in the form returned by |get_number_tree|) into | |
96 a git blob. | |
97 | |
98 Returns the git blob id as hex-encoded string. | |
99 | |
100 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} | |
101 >>> intern_number_tree(d) | |
102 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' | |
103 """ | |
104 with tempfile.TemporaryFile() as f: | |
105 for k, v in sorted(tree.iteritems()): | |
106 f.write(struct.pack(CHUNK_FMT, k, v)) | |
107 f.seek(0) | |
108 return git.intern_f(f) | |
109 | |
110 | |
111 def leaf_map_fn((pre, tree)): | |
112 """Converts a prefix and number tree into a git index line.""" | |
113 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) | |
114 | |
115 | |
116 def finalize(targets): | |
117 """Saves all cache data to the git repository. | |
118 | |
119 After calculating the generation number for |targets|, call finalize() to | |
120 save all the work to the git repository. | |
121 | |
122 This in particular saves the trees referred to by DIRTY_TREES. | |
123 """ | |
124 if not DIRTY_TREES: | |
125 return | |
126 | |
127 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues()) | |
128 | |
129 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx') | |
130 env = os.environ.copy() | |
131 env['GIT_INDEX_FILE'] = idx | |
132 | |
133 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES) | |
134 with git.ProgressPrinter(progress_message) as inc: | |
135 git.run('read-tree', REF, env=env) | |
136 | |
137 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) | |
138 updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'], | |
139 stdin=subprocess.PIPE, env=env) | |
140 | |
141 with git.ScopedPool(kind=POOL_KIND) as leaf_pool: | |
142 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): | |
143 updater.stdin.write(item) | |
144 inc() | |
145 | |
146 updater.stdin.close() | |
147 updater.wait() | |
148 | |
M-A Ruel
2013/11/13 19:55:00
Don't you care about updater.returncode?
iannucci
2013/11/14 07:06:29
added assert
| |
149 tree_id = git.run('write-tree', env=env) | |
150 commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF) | |
151 for t in targets: | |
152 commit_cmd += ['-p', binascii.hexlify(t)] | |
153 commit_cmd.append(tree_id) | |
154 commit_hash = git.run(*commit_cmd) | |
155 git.run('update-ref', REF, commit_hash) | |
156 DIRTY_TREES.clear() | |
157 | |
158 | |
159 def preload_tree(prefix): | |
160 """Returns the prefix and parsed tree object for the specified prefix.""" | |
161 return prefix, get_number_tree(prefix) | |
162 | |
163 | |
164 def all_prefixes(depth=PREFIX_LEN): | |
165 for x in (chr(i) for i in xrange(255)): | |
166 # This isn't covered because PREFIX_LEN currently == 1 | |
167 if depth > 1: # pragma: no cover | |
168 for r in all_prefixes(depth-1): | |
169 yield x+r | |
M-A Ruel
2013/11/13 19:55:00
x+ r
iannucci
2013/11/14 07:06:29
Done.
| |
170 else: | |
171 yield x | |
172 | |
173 | |
174 def load(targets): | |
175 """Load the generation numbers for targets. Calculates missing numbers if | |
M-A Ruel
2013/11/13 19:55:00
"""Loads the generation numbers for targets.
Calc
iannucci
2013/11/14 07:06:29
PTAL
| |
176 one or more of the targets is past the cached calculations. | |
177 | |
178 Args: | |
179 targets - An iterable of binary-encoded full git commit hashes. | |
180 """ | |
181 if all(get_num(t) is not None for t in targets): | |
182 return | |
183 | |
184 if git.tree(REF) is None: | |
185 empty = git.mktree({}) | |
186 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number', | |
187 empty) | |
188 git.run('update-ref', REF, commit_hash) | |
189 | |
190 with git.ScopedPool(kind=POOL_KIND) as pool: | |
191 preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) | |
192 | |
193 rev_list = [] | |
194 | |
195 with git.ProgressPrinter('Loading commits: %(count)d') as inc: | |
196 # Curiously, buffering the list into memory seems to be the fastest | |
197 # approach in python (as opposed to iterating over the lines in the | |
198 # stdout as they're produced). GIL strikes again :/ | |
199 cmd = [ | |
200 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF | |
201 ] + map(binascii.hexlify, targets) | |
202 for line in git.run(*cmd).splitlines(): | |
203 tokens = map(binascii.unhexlify, line.split()) | |
204 rev_list.append((tokens[0], tokens[1:])) | |
205 inc() | |
206 | |
207 get_number_tree.update(preload_iter) | |
208 | |
209 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc: | |
210 for commit_hash, pars in rev_list: | |
211 num = max(map(get_num, pars)) + 1 if pars else 0 | |
212 | |
213 prefix = commit_hash[:PREFIX_LEN] | |
214 get_number_tree(prefix)[commit_hash] = num | |
215 DIRTY_TREES[prefix] += 1 | |
216 get_num.set(commit_hash, num) | |
217 | |
218 inc() | |
219 | |
220 | |
221 def git_number(do_reset, do_cache, target_refs): | |
M-A Ruel
2013/11/13 19:55:00
I'd prefer a oneliner docstring
iannucci
2013/11/14 07:06:29
I nuked the function because it was too multipurpo
| |
222 if do_reset: | |
223 git.run('update-ref', '-d', REF) | |
224 return [] | |
225 | |
226 targets = git.parse_commitrefs(*target_refs) | |
227 load(targets) | |
228 ret = map(get_num, targets) | |
229 if do_cache: | |
230 finalize(targets) | |
231 | |
232 return ret | |
233 | |
234 | |
235 def main(): # pragma: no cover | |
M-A Ruel
2013/11/13 19:55:00
You could mock git_number and test it! :P
I'm kid
iannucci
2013/11/14 07:06:29
:D
| |
236 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__) | |
237 parser.add_option('--no-cache', action='store_true', | |
238 help='Do not actually cache anything we calculate.') | |
239 parser.add_option('--reset', action='store_true', | |
240 help='Reset the generation number cache and quit.') | |
241 parser.add_option('-v', '--verbose', action='count', default=0, | |
242 help='Be verbose. Use more times for more verbosity.') | |
243 opts, args = parser.parse_args() | |
244 | |
245 if opts.verbose == 1: | |
246 logging.getLogger().setLevel(logging.INFO) | |
247 elif opts.verbose >= 2: | |
248 logging.getLogger().setLevel(logging.DEBUG) | |
249 | |
250 try: | |
251 ret = git_number(opts.reset, not opts.no_cache, args or ['HEAD']) | |
252 print '\n'.join(map(str, ret)) | |
253 return 0 | |
254 except KeyboardInterrupt: | |
255 pass | |
M-A Ruel
2013/11/13 19:55:00
return 1
iannucci
2013/11/14 07:06:29
Done.
| |
256 | |
257 | |
258 if __name__ == '__main__': # pragma: no cover | |
259 sys.exit(main()) | |
OLD | NEW |