Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(41)

Side by Side Diff: git_number.py

Issue 26109002: Add git-number script to calculate generation numbers for commits. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: ws Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Usage: %prog [options] [<commitref>]*
7
8 If no <commitref>'s are supplied, it defaults to HEAD.
9
10 Calculates the generation number for one or more commits in a git repo.
11
12 Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16 This number can be used to order commits relative to each other, as long as for
17 any pair of the commits, one is an ancestor of the other.
18
19 Since calculating the generation number of a commit requires walking that
20 commit's entire history, this script caches all calculated data inside the git
21 repo that it operates on in the ref 'refs/number/commits'.
22 """
23
24 import binascii
25 import collections
26 import logging
27 import optparse
28 import os
29 import struct
30 import subprocess
31 import sys
32 import tempfile
33
34 import git_common as git
35
36 CHUNK_FMT = '!20sL'
37 CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38 DIRTY_TREES = collections.defaultdict(int)
39 REF = 'refs/number/commits'
40
41 # Number of bytes to use for the prefix on our internal number structure.
42 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
43 # need to reimplement cache data structures to be a bit more sophisticated than
44 # dicts. 1 seems to be just right.
45 PREFIX_LEN = 1
46
47 # Set this to 'threads' to gather coverage data while testing.
48 POOL_KIND = 'procs'
49
50
51 def pathlify(hash_prefix):
52 """Converts a binary object hash prefix into a posix path, one folder per
53 byte.
54
55 >>> pathlify('\xDE\xAD')
56 'de/ad'
57 """
58 return '/'.join('%02x' % ord(b) for b in hash_prefix)
59
60
61 @git.memoize_one(threadsafe=False)
62 def get_number_tree(prefix_bytes):
63 """Returns a dictionary of the git-number registry specified by
64 |prefix_bytes|.
65
66 This is in the form of {<full binary ref>: <gen num> ...}
67
68 >>> get_number_tree('\x83\xb4')
69 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
70 """
71 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
72
73 try:
74 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
75 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
76 for i in xrange(len(raw) / CHUNK_SIZE))
77 except git.CalledProcessError:
78 return {}
79
80
81 @git.memoize_one(threadsafe=False)
82 def get_num(commit_hash):
83 """Takes a commit hash and returns the generation number for it or None if
84 the commit hash doesn't have a computed value yet."""
85 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
86
87
88 def clear_caches():
89 """Clears in-process caches for e.g. unit testing."""
90 get_number_tree.clear()
91 get_num.clear()
92
93
94 def intern_number_tree(tree):
95 """Transforms a number tree (in the form returned by |get_number_tree|) into
96 a git blob.
97
98 Returns the git blob id as hex-encoded string.
99
100 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
101 >>> intern_number_tree(d)
102 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
103 """
104 with tempfile.TemporaryFile() as f:
105 for k, v in sorted(tree.iteritems()):
106 f.write(struct.pack(CHUNK_FMT, k, v))
107 f.seek(0)
108 return git.intern_f(f)
109
110
111 def leaf_map_fn((pre, tree)):
112 """Converts a prefix and number tree into a git index line."""
113 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
114
115
116 def finalize(targets):
117 """Saves all cache data to the git repository.
118
119 After calculating the generation number for |targets|, call finalize() to
120 save all the work to the git repository.
121
122 This in particular saves the trees referred to by DIRTY_TREES.
123 """
124 if not DIRTY_TREES:
125 return
126
127 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
128
129 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
130 env = os.environ.copy()
131 env['GIT_INDEX_FILE'] = idx
132
133 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
134 with git.ProgressPrinter(progress_message) as inc:
135 git.run('read-tree', REF, env=env)
136
137 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
138 updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'],
139 stdin=subprocess.PIPE, env=env)
140
141 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
142 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
143 updater.stdin.write(item)
144 inc()
145
146 updater.stdin.close()
147 updater.wait()
148
M-A Ruel 2013/11/13 19:55:00 Don't you care about updater.returncode?
iannucci 2013/11/14 07:06:29 added assert
149 tree_id = git.run('write-tree', env=env)
150 commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF)
151 for t in targets:
152 commit_cmd += ['-p', binascii.hexlify(t)]
153 commit_cmd.append(tree_id)
154 commit_hash = git.run(*commit_cmd)
155 git.run('update-ref', REF, commit_hash)
156 DIRTY_TREES.clear()
157
158
159 def preload_tree(prefix):
160 """Returns the prefix and parsed tree object for the specified prefix."""
161 return prefix, get_number_tree(prefix)
162
163
164 def all_prefixes(depth=PREFIX_LEN):
165 for x in (chr(i) for i in xrange(255)):
166 # This isn't covered because PREFIX_LEN currently == 1
167 if depth > 1: # pragma: no cover
168 for r in all_prefixes(depth-1):
169 yield x+r
M-A Ruel 2013/11/13 19:55:00 x+ r
iannucci 2013/11/14 07:06:29 Done.
170 else:
171 yield x
172
173
174 def load(targets):
175 """Load the generation numbers for targets. Calculates missing numbers if
M-A Ruel 2013/11/13 19:55:00 """Loads the generation numbers for targets. Calc
iannucci 2013/11/14 07:06:29 PTAL
176 one or more of the targets is past the cached calculations.
177
178 Args:
179 targets - An iterable of binary-encoded full git commit hashes.
180 """
181 if all(get_num(t) is not None for t in targets):
182 return
183
184 if git.tree(REF) is None:
185 empty = git.mktree({})
186 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
187 empty)
188 git.run('update-ref', REF, commit_hash)
189
190 with git.ScopedPool(kind=POOL_KIND) as pool:
191 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
192
193 rev_list = []
194
195 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
196 # Curiously, buffering the list into memory seems to be the fastest
197 # approach in python (as opposed to iterating over the lines in the
198 # stdout as they're produced). GIL strikes again :/
199 cmd = [
200 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF
201 ] + map(binascii.hexlify, targets)
202 for line in git.run(*cmd).splitlines():
203 tokens = map(binascii.unhexlify, line.split())
204 rev_list.append((tokens[0], tokens[1:]))
205 inc()
206
207 get_number_tree.update(preload_iter)
208
209 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
210 for commit_hash, pars in rev_list:
211 num = max(map(get_num, pars)) + 1 if pars else 0
212
213 prefix = commit_hash[:PREFIX_LEN]
214 get_number_tree(prefix)[commit_hash] = num
215 DIRTY_TREES[prefix] += 1
216 get_num.set(commit_hash, num)
217
218 inc()
219
220
221 def git_number(do_reset, do_cache, target_refs):
M-A Ruel 2013/11/13 19:55:00 I'd prefer a oneliner docstring
iannucci 2013/11/14 07:06:29 I nuked the function because it was too multipurpo
222 if do_reset:
223 git.run('update-ref', '-d', REF)
224 return []
225
226 targets = git.parse_commitrefs(*target_refs)
227 load(targets)
228 ret = map(get_num, targets)
229 if do_cache:
230 finalize(targets)
231
232 return ret
233
234
235 def main(): # pragma: no cover
M-A Ruel 2013/11/13 19:55:00 You could mock git_number and test it! :P I'm kid
iannucci 2013/11/14 07:06:29 :D
236 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
237 parser.add_option('--no-cache', action='store_true',
238 help='Do not actually cache anything we calculate.')
239 parser.add_option('--reset', action='store_true',
240 help='Reset the generation number cache and quit.')
241 parser.add_option('-v', '--verbose', action='count', default=0,
242 help='Be verbose. Use more times for more verbosity.')
243 opts, args = parser.parse_args()
244
245 if opts.verbose == 1:
246 logging.getLogger().setLevel(logging.INFO)
247 elif opts.verbose >= 2:
248 logging.getLogger().setLevel(logging.DEBUG)
249
250 try:
251 ret = git_number(opts.reset, not opts.no_cache, args or ['HEAD'])
252 print '\n'.join(map(str, ret))
253 return 0
254 except KeyboardInterrupt:
255 pass
M-A Ruel 2013/11/13 19:55:00 return 1
iannucci 2013/11/14 07:06:29 Done.
256
257
258 if __name__ == '__main__': # pragma: no cover
259 sys.exit(main())
OLDNEW
« git_common.py ('K') | « git_common.py ('k') | testing_support/coverage_utils.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698