Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(510)

Side by Side Diff: git_number.py

Issue 26109002: Add git-number script to calculate generation numbers for commits. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Usage: %prog [options] [<commitref>]*
7
8 If no <commitref>'s are supplied, it defaults to HEAD.
9
10 Calculates the generation number for one or more commits in a git repo.
11
12 Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16 This number can be used to order commits relative to each other, as long as for
17 any pair of the commits, one is an ancestor of the other.
18
19 Since calculating the generation number of a commit requires walking that
20 commit's entire history, this script caches all calculated data inside the git
21 repo that it operates on in the ref 'refs/number/commits'.
22 """
23
24 import binascii
25 import collections
26 import logging
27 import optparse
28 import os
29 import struct
30 import subprocess2
31 import sys
32 import tempfile
33
34 import git_common as git
35
36 CHUNK_FMT = '!20sL'
37 CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38 DIRTY_TREES = collections.defaultdict(int)
39 REF = 'refs/number/commits'
40
41 # Number of bytes to use for the prefix on our internal number structure.
42 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
43 # need to reimplement cache data structures to be a bit more sophisticated than
44 # dicts. 1 seems to be just right.
45 PREFIX_LEN = 1
46
47 # Set this to 'threads' to gather coverage data while testing.
48 POOL_KIND = 'procs'
49
50
51 def pathlify(hash_prefix):
52 """Converts a binary object hash prefix into a posix path, one folder per
53 byte.
54
55 >>> pathlify('\xDE\xAD')
56 'de/ad'
57 """
58 return '/'.join('%02x' % ord(b) for b in hash_prefix)
59
60
61 @git.memoize_one(threadsafe=False)
62 def get_number_tree(prefix_bytes):
63 """Returns a dictionary of the git-number registry specified by
64 |prefix_bytes|.
65
66 This is in the form of {<full binary ref>: <gen num> ...}
67
68 >>> get_number_tree('\x83\xb4')
69 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
70 """
71 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
72
73 try:
74 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
75 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
76 for i in xrange(len(raw) / CHUNK_SIZE))
77 except subprocess2.CalledProcessError:
78 return {}
79
80
81 @git.memoize_one(threadsafe=False)
82 def get_num(commit_hash):
83 """Takes a commit hash and returns the generation number for it or None if
84 the commit hash doesn't have a computed value yet."""
85 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
86
87
88 def clear_caches(on_disk=False):
89 """Clears in-process caches for e.g. unit testing."""
90 get_number_tree.clear()
91 get_num.clear()
92 if on_disk:
93 git.run('update-ref', '-d', REF)
94
95
96 def intern_number_tree(tree):
97 """Transforms a number tree (in the form returned by |get_number_tree|) into
98 a git blob.
99
100 Returns the git blob id as hex-encoded string.
101
102 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
103 >>> intern_number_tree(d)
104 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
105 """
106 with tempfile.TemporaryFile() as f:
107 for k, v in sorted(tree.iteritems()):
108 f.write(struct.pack(CHUNK_FMT, k, v))
109 f.seek(0)
110 return git.intern_f(f)
M-A Ruel 2013/11/14 17:27:24 I don't think it'll work on Windows. This is becau
iannucci 2013/11/15 02:12:58 Totally works :)
111
112
113 def leaf_map_fn((pre, tree)):
114 """Converts a prefix and number tree into a git index line."""
115 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
116
117
118 def finalize(targets):
119 """Saves all cache data to the git repository.
120
121 After calculating the generation number for |targets|, call finalize() to
122 save all the work to the git repository.
123
124 This in particular saves the trees referred to by DIRTY_TREES.
125 """
126 if not DIRTY_TREES:
127 return
128
129 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
130
131 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
132 env = os.environ.copy()
133 env['GIT_INDEX_FILE'] = idx
134
135 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
136 with git.ProgressPrinter(progress_message) as inc:
137 git.run('read-tree', REF, env=env)
138
139 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
140 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
141 stdin=subprocess2.PIPE, env=env)
142
143 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
144 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
145 updater.stdin.write(item)
146 inc()
147
148 updater.stdin.close()
149 updater.wait()
150 assert updater.returncode == 0
151
152 tree_id = git.run('write-tree', env=env)
153 commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF)
154 for t in targets:
155 commit_cmd += ['-p', binascii.hexlify(t)]
156 commit_cmd.append(tree_id)
157 commit_hash = git.run(*commit_cmd)
158 git.run('update-ref', REF, commit_hash)
159 DIRTY_TREES.clear()
160
161
162 def preload_tree(prefix):
163 """Returns the prefix and parsed tree object for the specified prefix."""
164 return prefix, get_number_tree(prefix)
165
166
167 def all_prefixes(depth=PREFIX_LEN):
168 for x in (chr(i) for i in xrange(255)):
169 # This isn't covered because PREFIX_LEN currently == 1
170 if depth > 1: # pragma: no cover
171 for r in all_prefixes(depth - 1):
172 yield x + r
173 else:
174 yield x
175
176
177 def load_generation_numbers(targets):
178 """Populates the caches of get_num and get_number_tree so they contain
179 the results for |targets|.
180
181 Loads cached numbers from disk, and calculates missing numbers if one or
182 more of |targets| is newer than the cached calculations.
183
184 Args:
185 targets - An iterable of binary-encoded full git commit hashes.
186 """
187 if all(get_num(t) is not None for t in targets):
M-A Ruel 2013/11/14 17:27:24 This is somewhat dangerous. If someone uses a gene
iannucci 2013/11/15 02:12:58 hm, good point. I'll list-ify it.
188 return
189
190 if git.tree(REF) is None:
191 empty = git.mktree({})
192 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
193 empty)
194 git.run('update-ref', REF, commit_hash)
195
196 with git.ScopedPool(kind=POOL_KIND) as pool:
197 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
198
199 rev_list = []
200
201 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
202 # Curiously, buffering the list into memory seems to be the fastest
203 # approach in python (as opposed to iterating over the lines in the
204 # stdout as they're produced). GIL strikes again :/
205 cmd = [
206 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF
207 ] + map(binascii.hexlify, targets)
208 for line in git.run(*cmd).splitlines():
209 tokens = map(binascii.unhexlify, line.split())
210 rev_list.append((tokens[0], tokens[1:]))
211 inc()
212
213 get_number_tree.update(preload_iter)
214
215 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
216 for commit_hash, pars in rev_list:
217 num = max(map(get_num, pars)) + 1 if pars else 0
218
219 prefix = commit_hash[:PREFIX_LEN]
220 get_number_tree(prefix)[commit_hash] = num
221 DIRTY_TREES[prefix] += 1
222 get_num.set(commit_hash, num)
223
224 inc()
225
226
227 def main(): # pragma: no cover
228 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
229 parser.add_option('--no-cache', action='store_true',
230 help='Do not actually cache anything we calculate.')
231 parser.add_option('--reset', action='store_true',
232 help='Reset the generation number cache and quit.')
233 parser.add_option('-v', '--verbose', action='count', default=0,
234 help='Be verbose. Use more times for more verbosity.')
235 opts, args = parser.parse_args()
236
237 if opts.verbose == 1:
238 logging.getLogger().setLevel(logging.INFO)
239 elif opts.verbose >= 2:
240 logging.getLogger().setLevel(logging.DEBUG)
241
242 try:
243 if opts.reset:
244 clear_caches(on_disk=True)
245 return
246
247 targets = git.parse_commitrefs(*(args or ['HEAD']))
248 load_generation_numbers(targets)
249 if not opts.no_cache:
250 finalize(targets)
251
252 print '\n'.join(str(get_num(x)) for x in targets)
253 return 0
254 except KeyboardInterrupt:
255 return 1
256
257
258 if __name__ == '__main__': # pragma: no cover
259 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698