Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(810)

Side by Side Diff: git_number.py

Issue 26109002: Add git-number script to calculate generation numbers for commits. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Add version checking for coverage module Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « git_common.py ('k') | testing_support/coverage_utils.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Usage: %prog [options] [<commitref>]*
7
8 If no <commitref>'s are supplied, it defaults to HEAD.
9
10 Calculates the generation number for one or more commits in a git repo.
11
12 Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16 This number can be used to order commits relative to each other, as long as for
17 any pair of the commits, one is an ancestor of the other.
18
19 Since calculating the generation number of a commit requires walking that
20 commit's entire history, this script caches all calculated data inside the git
21 repo that it operates on in the ref 'refs/number/commits'.
22 """
23
24 import binascii
25 import collections
26 import logging
27 import optparse
28 import os
29 import struct
30 import sys
31 import tempfile
32
33 import git_common as git
34 import subprocess2
35
36 CHUNK_FMT = '!20sL'
37 CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38 DIRTY_TREES = collections.defaultdict(int)
39 REF = 'refs/number/commits'
40
41 # Number of bytes to use for the prefix on our internal number structure.
42 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
43 # need to reimplement cache data structures to be a bit more sophisticated than
44 # dicts. 1 seems to be just right.
45 PREFIX_LEN = 1
46
47 # Set this to 'threads' to gather coverage data while testing.
48 POOL_KIND = 'procs'
49
50
51 def pathlify(hash_prefix):
52 """Converts a binary object hash prefix into a posix path, one folder per
53 byte.
54
55 >>> pathlify('\xDE\xAD')
56 'de/ad'
57 """
58 return '/'.join('%02x' % ord(b) for b in hash_prefix)
59
60
61 @git.memoize_one(threadsafe=False)
62 def get_number_tree(prefix_bytes):
63 """Returns a dictionary of the git-number registry specified by
64 |prefix_bytes|.
65
66 This is in the form of {<full binary ref>: <gen num> ...}
67
68 >>> get_number_tree('\x83\xb4')
69 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
70 """
71 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
72
73 try:
74 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
75 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
76 for i in xrange(len(raw) / CHUNK_SIZE))
77 except subprocess2.CalledProcessError:
78 return {}
79
80
81 @git.memoize_one(threadsafe=False)
82 def get_num(commit_hash):
83 """Returns the generation number for a commit.
84
85 Returns None if the generation number for this commit hasn't been calculated
86 yet (see load_generation_numbers()).
87 """
88 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
89
90
91 def clear_caches(on_disk=False):
92 """Clears in-process caches for e.g. unit testing."""
93 get_number_tree.clear()
94 get_num.clear()
95 if on_disk:
96 git.run('update-ref', '-d', REF)
97
98
99 def intern_number_tree(tree):
100 """Transforms a number tree (in the form returned by |get_number_tree|) into
101 a git blob.
102
103 Returns the git blob id as hex-encoded string.
104
105 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
106 >>> intern_number_tree(d)
107 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
108 """
109 with tempfile.TemporaryFile() as f:
110 for k, v in sorted(tree.iteritems()):
111 f.write(struct.pack(CHUNK_FMT, k, v))
112 f.seek(0)
113 return git.intern_f(f)
114
115
116 def leaf_map_fn((pre, tree)):
117 """Converts a prefix and number tree into a git index line."""
118 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
119
120
121 def finalize(targets):
122 """Saves all cache data to the git repository.
123
124 After calculating the generation number for |targets|, call finalize() to
125 save all the work to the git repository.
126
127 This in particular saves the trees referred to by DIRTY_TREES.
128 """
129 if not DIRTY_TREES:
130 return
131
132 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
133
134 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
135 env = os.environ.copy()
136 env['GIT_INDEX_FILE'] = idx
137
138 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
139 with git.ProgressPrinter(progress_message) as inc:
140 git.run('read-tree', REF, env=env)
141
142 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
143 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
144 stdin=subprocess2.PIPE, env=env)
145
146 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
147 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
148 updater.stdin.write(item)
149 inc()
150
151 updater.stdin.close()
152 updater.wait()
153 assert updater.returncode == 0
154
155 tree_id = git.run('write-tree', env=env)
156 commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF)
157 for t in targets:
158 commit_cmd.extend(['-p', binascii.hexlify(t)])
159 commit_cmd.append(tree_id)
160 commit_hash = git.run(*commit_cmd)
161 git.run('update-ref', REF, commit_hash)
162 DIRTY_TREES.clear()
163
164
165 def preload_tree(prefix):
166 """Returns the prefix and parsed tree object for the specified prefix."""
167 return prefix, get_number_tree(prefix)
168
169
170 def all_prefixes(depth=PREFIX_LEN):
171 for x in (chr(i) for i in xrange(255)):
172 # This isn't covered because PREFIX_LEN currently == 1
173 if depth > 1: # pragma: no cover
174 for r in all_prefixes(depth - 1):
175 yield x + r
176 else:
177 yield x
178
179
180 def load_generation_numbers(targets):
181 """Populates the caches of get_num and get_number_tree so they contain
182 the results for |targets|.
183
184 Loads cached numbers from disk, and calculates missing numbers if one or
185 more of |targets| is newer than the cached calculations.
186
187 Args:
188 targets - An iterable of binary-encoded full git commit hashes.
189 """
190 # In case they pass us a generator, listify targets.
191 targets = list(targets)
192
193 if all(get_num(t) is not None for t in targets):
194 return
195
196 if git.tree(REF) is None:
197 empty = git.mktree({})
198 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
199 empty)
200 git.run('update-ref', REF, commit_hash)
201
202 with git.ScopedPool(kind=POOL_KIND) as pool:
203 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
204
205 rev_list = []
206
207 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
208 # Curiously, buffering the list into memory seems to be the fastest
209 # approach in python (as opposed to iterating over the lines in the
210 # stdout as they're produced). GIL strikes again :/
211 cmd = [
212 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF,
213 ] + map(binascii.hexlify, targets)
214 for line in git.run(*cmd).splitlines():
215 tokens = map(binascii.unhexlify, line.split())
216 rev_list.append((tokens[0], tokens[1:]))
217 inc()
218
219 get_number_tree.update(preload_iter)
220
221 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
222 for commit_hash, pars in rev_list:
223 num = max(map(get_num, pars)) + 1 if pars else 0
224
225 prefix = commit_hash[:PREFIX_LEN]
226 get_number_tree(prefix)[commit_hash] = num
227 DIRTY_TREES[prefix] += 1
228 get_num.set(commit_hash, num)
229
230 inc()
231
232
233 def main(): # pragma: no cover
234 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
235 parser.add_option('--no-cache', action='store_true',
236 help='Do not actually cache anything we calculate.')
237 parser.add_option('--reset', action='store_true',
238 help='Reset the generation number cache and quit.')
239 parser.add_option('-v', '--verbose', action='count', default=0,
240 help='Be verbose. Use more times for more verbosity.')
241 opts, args = parser.parse_args()
242
243 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
244 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
245
246 try:
247 if opts.reset:
248 clear_caches(on_disk=True)
249 return
250
251 try:
252 targets = git.parse_commitrefs(*(args or ['HEAD']))
253 except git.BadCommitRefException as e:
254 parser.error(e)
255
256 load_generation_numbers(targets)
257 if not opts.no_cache:
258 finalize(targets)
259
260 print '\n'.join(map(str, map(get_num, targets)))
261 return 0
262 except KeyboardInterrupt:
263 return 1
264
265
266 if __name__ == '__main__': # pragma: no cover
267 sys.exit(main())
OLDNEW
« no previous file with comments | « git_common.py ('k') | testing_support/coverage_utils.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698