Chromium Code Reviews

Side by Side Diff: git_number.py

Issue 26109002: Add git-number script to calculate generation numbers for commits. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Remove silly author line :) Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | | Annotate | Revision Log
« git_common.py ('K') | « git_common.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 import collections
7 import os
8 import optparse
9 import struct
10 import subprocess
11 import tempfile
12
13 import git_common
14 from git_common import git_hash, run_git, git_intern_f, git_tree
M-A Ruel 2013/10/21 17:56:44 Many of them are used once or twice. This doesn't
iannucci 2013/10/22 07:28:22 Donez'd
15 from git_common import git_mktree, StatusPrinter, hexlify, unhexlify, pathlify
16 from git_common import parse_committish, ScopedPool, memoize_one
17
18
19 CHUNK_FMT = '!20sL'
20 CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
21 DIRTY_TREES = collections.defaultdict(int)
22 REF = 'refs/number/commits'
23
24 # Number of bytes to use for the prefix on our internal number structure.
25 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
26 # need to reimplement cache data structures to be a bit more sophistocated than
M-A Ruel 2013/10/21 17:56:44 sophisticated
iannucci 2013/10/22 07:28:22 oops. Done.
27 # dicts. 1 seems to be just right.
28 PREFIX_LEN = 1
29
30
31 @memoize_one
32 def get_number_tree(prefix_bytes):
33 """Return a dictionary of the blob contents specified by |prefix_bytes|.
34 This is in the form of {<full binary ref>: <gen num> ...}
35
36 >>> get_number_tree('\x83\xb4')
37 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
38 """
39 ret = {}
40 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
41
42 p = subprocess.Popen(['git', 'cat-file', 'blob', ref],
43 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
44 p.stderr.close()
45 raw = buffer(p.stdout.read())
M-A Ruel 2013/10/21 17:56:44 Why not use .communicate()? You risk a pipe buffer
iannucci 2013/10/22 07:28:22 Yeah, this used to make more sense. Done.
46 for i in xrange(len(raw) / CHUNK_SIZE):
47 ref, num = struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
48 ret[ref] = num
49
50 return ret
51
52
53 @memoize_one
54 def get_num(ref):
M-A Ruel 2013/10/21 17:56:44 A ref in git parlance is specifically not a hash.
iannucci 2013/10/22 07:28:22 You're right. hash is a built-in though... maybe c
M-A Ruel 2013/10/24 13:23:03 commit_hash
iannucci 2013/10/25 00:52:41 Done.
55 """Takes a hash and returns the generation number for it or None if the
56 ref is unknown."""
57 return get_number_tree(ref[:PREFIX_LEN]).get(ref)
58
59
60 def intern_number_tree(tree):
61 """Transform a number tree (in the form returned by |get_number_tree|) into a
M-A Ruel 2013/10/21 17:56:44 Transforms
iannucci 2013/10/22 07:28:22 Done.
62 git blob.
63
64 Returns the git blob id as hex-encoded string.
65
66 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
67 >>> intern_number_tree(d)
68 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
69 """
70 with tempfile.TemporaryFile() as f:
71 for k, v in sorted(tree.iteritems()):
72 f.write(struct.pack(CHUNK_FMT, k, v))
73 f.seek(0)
74 return git_intern_f(f)
75
76
77 def leaf_map_fn((pre, tree)):
78 """Converts a prefix and number tree into a git index line."""
79 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
80
81
82 def finalize(targets):
83 """After calculating the generation number for |targets|, call finalize to
M-A Ruel 2013/10/21 17:56:44 The "After .." part should be in the second line d
iannucci 2013/10/22 07:28:22 Done.
84 save all our work to the git repository.
85 """
86 if not DIRTY_TREES:
87 return
88
89 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
90
M-A Ruel 2013/10/21 17:56:44 one empty line max.
iannucci 2013/10/22 07:28:22 Done.
91
92 idx = os.path.join(run_git('rev-parse', '--git-dir'), 'number.idx')
93 env = os.environ.copy()
94 env['GIT_INDEX_FILE'] = idx
95
96 with StatusPrinter('Finalizing: (%%d/%d)' % len(DIRTY_TREES)) as inc:
agable 2013/10/21 20:16:42 This half-formatted string is unfortunate. You cou
iannucci 2013/10/22 07:28:22 k. Done.
97 run_git('read-tree', REF, env=env)
98
99 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
100 updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'],
101 stdin=subprocess.PIPE, env=env)
102
103 with ScopedPool() as leaf_pool:
104 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
105 updater.stdin.write(item)
106 inc()
107
108 updater.stdin.close()
109 updater.wait()
110
111 commit_cmd = ['commit-tree', '-m', msg, '-p', git_hash(REF)]
112 for t in targets:
113 commit_cmd += ['-p', hexlify(t)]
114 commit_cmd.append(run_git('write-tree', env=env))
agable 2013/10/21 20:16:42 This is confusing, especially since you're calling
iannucci 2013/10/22 07:28:22 Hm... not sure I agree, but sure. Done.
115 commit_id = run_git(*commit_cmd)
116 run_git('update-ref', REF, commit_id)
117
118
119 def preload_tree(prefix):
120 """Returns the prefix and parsed tree object for the specified prefix."""
121 return prefix, get_number_tree(prefix)
122
123
124 def all_prefixes(depth=PREFIX_LEN):
125 for x in (chr(i) for i in xrange(255)):
126 if depth > 1:
127 for r in all_prefixes(depth-1):
128 yield x+r
129 else:
130 yield x
131
132
133 def load(targets):
134 """Load/calculate the generation numbers for targets.
135
136 Args:
137 targets - An iterable of binary-encoded full git commit id hashes.
138 """
139 if all(get_num(t) is not None for t in targets):
140 return
141
142 if git_tree(REF) is None:
143 empty = git_mktree({})
144 ref = run_git('commit-tree', '-m', 'Initial commit from git-number', empty)
145 run_git('update-ref', REF, ref)
146
147 with ScopedPool() as pool:
148 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
149
150 rev_list = []
151
152 with StatusPrinter('Loading commits: %d') as inc:
153 # Curiously, buffering the list into memory seems to be the fastest
154 # approach in python (as opposed to iterating over the lines in the
155 # stdout as they're produced). GIL strikes again :/
156 cmd = [
157 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF
158 ] + map(hexlify, targets)
159 for line in run_git(*cmd).splitlines():
160 toks = map(unhexlify, line.split())
agable 2013/10/21 20:16:42 tokens
iannucci 2013/10/22 07:28:22 Done.
161 rev_list.append((toks[0], toks[1:]))
162 inc()
163
164 for prefix, tree in preload_iter:
165 get_number_tree.cache[prefix] = tree
166
167 with StatusPrinter('Counting: %%d/%d' % len(rev_list)) as inc:
168 for ref, pars in rev_list:
169 num = max(map(get_num, pars)) + 1 if pars else 0
170
171 prefix = ref[:PREFIX_LEN]
172 get_number_tree(prefix)[ref] = num
173 DIRTY_TREES[prefix] += 1
174 get_num.cache[ref] = num
175
176 inc()
177
178
179 def main():
180 parser = optparse.OptionParser(
181 usage='usage: %prog [options] [<committish>]\n\n'
182 '<committish> defaults to HEAD')
183 parser.add_option('--no-cache', action='store_true',
184 help='Do not actually cache anything we calculate.')
185 parser.add_option('--reset', action='store_true',
186 help='Reset the generation number cache and quit.')
187 parser.add_option('-v', '--verbose', action='count',
agable 2013/10/21 20:16:42 default=0
iannucci 2013/10/22 07:28:22 Done.
188 help='Be verbose. Use more times for more verbosity.')
189 opts, args = parser.parse_args()
190
191 if not args:
192 args = ['HEAD']
193
194 if opts.verbose:
M-A Ruel 2013/10/21 17:56:44 Why do it conditionally?
iannucci 2013/10/22 07:28:22 Done.
195 git_common.VERBOSE_LEVEL = opts.verbose
196
197 if opts.reset:
198 run_git('update-ref', '-d', REF)
M-A Ruel 2013/10/21 17:56:44 I'd prefer to return 0 right after instead of usin
iannucci 2013/10/22 07:28:22 Done.
199 else:
200 targets = parse_committish(*args)
201 load(targets)
202 for t in targets:
203 print get_num(t)
204 if not opts.no_cache:
205 finalize(targets)
M-A Ruel 2013/10/21 17:56:44 return 0 after.
iannucci 2013/10/22 07:28:22 Done.
206
207
208 if __name__ == '__main__':
209 try:
210 main()
M-A Ruel 2013/10/21 17:56:44 sys.exit(main())
iannucci 2013/10/22 07:28:22 Done.
211 except KeyboardInterrupt:
M-A Ruel 2013/10/21 17:56:44 I'd prefer this to be handled inside the main().
iannucci 2013/10/22 07:28:22 Done.
212 pass
OLDNEW
« git_common.py ('K') | « git_common.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine