Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(269)

Side by Side Diff: git_number.py

Issue 26109002: Add git-number script to calculate generation numbers for commits. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: fixes Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Usage: %prog [options] [<commitref>]*
7
8 If no <commitref>'s are supplied, it defaults to HEAD.
9
10 Calculates the generation number for one or more commits in a git repo.
11
12 Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16 This number can be used to order commits relative to each other, as long as for
17 any pair of the commits, one is an ancestor of the other.
18
19 Since calculating the generation number of a commit requires walking that
20 commit's entire history, this script caches all calculated data inside the git
21 repo that it operates on in the ref 'refs/number/commits'.
22 """
23
24 import binascii
25 import collections
26 import logging
27 import optparse
28 import os
29 import struct
30 import sys
31 import tempfile
32
33 import git_common as git
34 import subprocess2
35
36 CHUNK_FMT = '!20sL'
37 CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38 DIRTY_TREES = collections.defaultdict(int)
39 REF = 'refs/number/commits'
40
41 # Number of bytes to use for the prefix on our internal number structure.
42 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
43 # need to reimplement cache data structures to be a bit more sophisticated than
44 # dicts. 1 seems to be just right.
45 PREFIX_LEN = 1
46
47 # Set this to 'threads' to gather coverage data while testing.
48 POOL_KIND = 'procs'
49
50
51 def pathlify(hash_prefix):
52 """Converts a binary object hash prefix into a posix path, one folder per
53 byte.
54
55 >>> pathlify('\xDE\xAD')
56 'de/ad'
57 """
58 return '/'.join('%02x' % ord(b) for b in hash_prefix)
59
60
61 @git.memoize_one(threadsafe=False)
62 def get_number_tree(prefix_bytes):
63 """Returns a dictionary of the git-number registry specified by
64 |prefix_bytes|.
65
66 This is in the form of {<full binary ref>: <gen num> ...}
67
68 >>> get_number_tree('\x83\xb4')
69 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
70 """
71 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
72
73 try:
74 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
75 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
76 for i in xrange(len(raw) / CHUNK_SIZE))
77 except subprocess2.CalledProcessError:
78 return {}
79
80
81 @git.memoize_one(threadsafe=False)
82 def get_num(commit_hash):
83 """Takes a commit hash and returns the generation number for it or None if
84 the commit hash doesn't have a computed value yet."""
85 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
86
87
88 def clear_caches(on_disk=False):
89 """Clears in-process caches for e.g. unit testing."""
90 get_number_tree.clear()
91 get_num.clear()
92 if on_disk:
93 git.run('update-ref', '-d', REF)
94
95
96 def intern_number_tree(tree):
97 """Transforms a number tree (in the form returned by |get_number_tree|) into
98 a git blob.
99
100 Returns the git blob id as hex-encoded string.
101
102 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
103 >>> intern_number_tree(d)
104 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
105 """
106 with tempfile.TemporaryFile() as f:
107 for k, v in sorted(tree.iteritems()):
108 f.write(struct.pack(CHUNK_FMT, k, v))
109 f.seek(0)
110 return git.intern_f(f)
111
112
113 def leaf_map_fn((pre, tree)):
114 """Converts a prefix and number tree into a git index line."""
115 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
116
117
118 def finalize(targets):
119 """Saves all cache data to the git repository.
120
121 After calculating the generation number for |targets|, call finalize() to
122 save all the work to the git repository.
123
124 This in particular saves the trees referred to by DIRTY_TREES.
125 """
126 if not DIRTY_TREES:
127 return
128
129 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
130
131 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
132 env = os.environ.copy()
133 env['GIT_INDEX_FILE'] = idx
134
135 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
136 with git.ProgressPrinter(progress_message) as inc:
137 git.run('read-tree', REF, env=env)
138
139 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
140 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
141 stdin=subprocess2.PIPE, env=env)
142
143 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
144 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
145 updater.stdin.write(item)
146 inc()
147
148 updater.stdin.close()
149 updater.wait()
150 assert updater.returncode == 0
151
152 tree_id = git.run('write-tree', env=env)
153 commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF)
154 for t in targets:
155 commit_cmd += ['-p', binascii.hexlify(t)]
156 commit_cmd.append(tree_id)
157 commit_hash = git.run(*commit_cmd)
158 git.run('update-ref', REF, commit_hash)
159 DIRTY_TREES.clear()
160
161
162 def preload_tree(prefix):
163 """Returns the prefix and parsed tree object for the specified prefix."""
164 return prefix, get_number_tree(prefix)
165
166
167 def all_prefixes(depth=PREFIX_LEN):
168 for x in (chr(i) for i in xrange(255)):
169 # This isn't covered because PREFIX_LEN currently == 1
170 if depth > 1: # pragma: no cover
171 for r in all_prefixes(depth - 1):
172 yield x + r
173 else:
174 yield x
175
176
177 def load_generation_numbers(targets):
178 """Populates the caches of get_num and get_number_tree so they contain
179 the results for |targets|.
180
181 Loads cached numbers from disk, and calculates missing numbers if one or
182 more of |targets| is newer than the cached calculations.
183
184 Args:
185 targets - An iterable of binary-encoded full git commit hashes.
186 """
187 # In case they pass us a generator, listify targets.
188 targets = list(targets)
189
190 if all(get_num(t) is not None for t in targets):
191 return
192
193 if git.tree(REF) is None:
194 empty = git.mktree({})
195 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
196 empty)
197 git.run('update-ref', REF, commit_hash)
198
199 with git.ScopedPool(kind=POOL_KIND) as pool:
200 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
201
202 rev_list = []
203
204 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
205 # Curiously, buffering the list into memory seems to be the fastest
206 # approach in python (as opposed to iterating over the lines in the
207 # stdout as they're produced). GIL strikes again :/
208 cmd = [
209 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF
210 ] + map(binascii.hexlify, targets)
211 for line in git.run(*cmd).splitlines():
212 tokens = map(binascii.unhexlify, line.split())
213 rev_list.append((tokens[0], tokens[1:]))
214 inc()
215
216 get_number_tree.update(preload_iter)
217
218 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
219 for commit_hash, pars in rev_list:
220 num = max(map(get_num, pars)) + 1 if pars else 0
221
222 prefix = commit_hash[:PREFIX_LEN]
223 get_number_tree(prefix)[commit_hash] = num
224 DIRTY_TREES[prefix] += 1
225 get_num.set(commit_hash, num)
226
227 inc()
228
229
230 def main(): # pragma: no cover
231 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
232 parser.add_option('--no-cache', action='store_true',
233 help='Do not actually cache anything we calculate.')
234 parser.add_option('--reset', action='store_true',
235 help='Reset the generation number cache and quit.')
236 parser.add_option('-v', '--verbose', action='count', default=0,
237 help='Be verbose. Use more times for more verbosity.')
238 opts, args = parser.parse_args()
239
240 if opts.verbose == 1:
241 logging.getLogger().setLevel(logging.INFO)
M-A Ruel 2013/11/15 02:51:29 why not use: levels = [logging.ERROR, logging.INFO
iannucci 2013/11/15 04:16:05 Done (except it should be min() :))
242 elif opts.verbose >= 2:
243 logging.getLogger().setLevel(logging.DEBUG)
244
245 try:
246 if opts.reset:
247 clear_caches(on_disk=True)
248 return
249
250 targets = git.parse_commitrefs(*(args or ['HEAD']))
M-A Ruel 2013/11/15 02:51:29 You could do management here instead; if not targe
iannucci 2013/11/15 04:16:05 Did both
251 load_generation_numbers(targets)
252 if not opts.no_cache:
253 finalize(targets)
254
255 print '\n'.join(str(get_num(x)) for x in targets)
256 return 0
257 except KeyboardInterrupt:
258 return 1
259
260
261 if __name__ == '__main__': # pragma: no cover
262 sys.exit(main())
OLDNEW
« git_common.py ('K') | « git_common.py ('k') | testing_support/coverage_utils.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698