OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 import collections | |
7 import os | |
8 import optparse | |
9 import struct | |
10 import subprocess | |
11 import tempfile | |
12 | |
13 import git_common | |
14 from git_common import git_hash, run_git, git_intern_f, git_tree | |
M-A Ruel
2013/10/21 17:56:44
Many of them are used once or twice. This doesn't
iannucci
2013/10/22 07:28:22
Donez'd
| |
15 from git_common import git_mktree, StatusPrinter, hexlify, unhexlify, pathlify | |
16 from git_common import parse_committish, ScopedPool, memoize_one | |
17 | |
18 | |
19 CHUNK_FMT = '!20sL' | |
20 CHUNK_SIZE = struct.calcsize(CHUNK_FMT) | |
21 DIRTY_TREES = collections.defaultdict(int) | |
22 REF = 'refs/number/commits' | |
23 | |
24 # Number of bytes to use for the prefix on our internal number structure. | |
25 # 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would | |
26 # need to reimplement cache data structures to be a bit more sophistocated than | |
M-A Ruel
2013/10/21 17:56:44
sophisticated
iannucci
2013/10/22 07:28:22
oops. Done.
| |
27 # dicts. 1 seems to be just right. | |
28 PREFIX_LEN = 1 | |
29 | |
30 | |
31 @memoize_one | |
32 def get_number_tree(prefix_bytes): | |
33 """Return a dictionary of the blob contents specified by |prefix_bytes|. | |
34 This is in the form of {<full binary ref>: <gen num> ...} | |
35 | |
36 >>> get_number_tree('\x83\xb4') | |
37 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} | |
38 """ | |
39 ret = {} | |
40 ref = '%s:%s' % (REF, pathlify(prefix_bytes)) | |
41 | |
42 p = subprocess.Popen(['git', 'cat-file', 'blob', ref], | |
43 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
44 p.stderr.close() | |
45 raw = buffer(p.stdout.read()) | |
M-A Ruel
2013/10/21 17:56:44
Why not use .communicate()? You risk a pipe buffer
iannucci
2013/10/22 07:28:22
Yeah, this used to make more sense. Done.
| |
46 for i in xrange(len(raw) / CHUNK_SIZE): | |
47 ref, num = struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) | |
48 ret[ref] = num | |
49 | |
50 return ret | |
51 | |
52 | |
53 @memoize_one | |
54 def get_num(ref): | |
M-A Ruel
2013/10/21 17:56:44
A ref in git parlance is specifically not a hash.
iannucci
2013/10/22 07:28:22
You're right. hash is a built-in though... maybe c
M-A Ruel
2013/10/24 13:23:03
commit_hash
iannucci
2013/10/25 00:52:41
Done.
| |
55 """Takes a hash and returns the generation number for it or None if the | |
56 ref is unknown.""" | |
57 return get_number_tree(ref[:PREFIX_LEN]).get(ref) | |
58 | |
59 | |
60 def intern_number_tree(tree): | |
61 """Transform a number tree (in the form returned by |get_number_tree|) into a | |
M-A Ruel
2013/10/21 17:56:44
Transforms
iannucci
2013/10/22 07:28:22
Done.
| |
62 git blob. | |
63 | |
64 Returns the git blob id as hex-encoded string. | |
65 | |
66 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} | |
67 >>> intern_number_tree(d) | |
68 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' | |
69 """ | |
70 with tempfile.TemporaryFile() as f: | |
71 for k, v in sorted(tree.iteritems()): | |
72 f.write(struct.pack(CHUNK_FMT, k, v)) | |
73 f.seek(0) | |
74 return git_intern_f(f) | |
75 | |
76 | |
77 def leaf_map_fn((pre, tree)): | |
78 """Converts a prefix and number tree into a git index line.""" | |
79 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) | |
80 | |
81 | |
82 def finalize(targets): | |
83 """After calculating the generation number for |targets|, call finalize to | |
M-A Ruel
2013/10/21 17:56:44
The "After .." part should be in the second line d
iannucci
2013/10/22 07:28:22
Done.
| |
84 save all our work to the git repository. | |
85 """ | |
86 if not DIRTY_TREES: | |
87 return | |
88 | |
89 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues()) | |
90 | |
M-A Ruel
2013/10/21 17:56:44
one empty line max.
iannucci
2013/10/22 07:28:22
Done.
| |
91 | |
92 idx = os.path.join(run_git('rev-parse', '--git-dir'), 'number.idx') | |
93 env = os.environ.copy() | |
94 env['GIT_INDEX_FILE'] = idx | |
95 | |
96 with StatusPrinter('Finalizing: (%%d/%d)' % len(DIRTY_TREES)) as inc: | |
agable
2013/10/21 20:16:42
This half-formatted string is unfortunate. You cou
iannucci
2013/10/22 07:28:22
k. Done.
| |
97 run_git('read-tree', REF, env=env) | |
98 | |
99 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) | |
100 updater = subprocess.Popen(['git', 'update-index', '-z', '--index-info'], | |
101 stdin=subprocess.PIPE, env=env) | |
102 | |
103 with ScopedPool() as leaf_pool: | |
104 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): | |
105 updater.stdin.write(item) | |
106 inc() | |
107 | |
108 updater.stdin.close() | |
109 updater.wait() | |
110 | |
111 commit_cmd = ['commit-tree', '-m', msg, '-p', git_hash(REF)] | |
112 for t in targets: | |
113 commit_cmd += ['-p', hexlify(t)] | |
114 commit_cmd.append(run_git('write-tree', env=env)) | |
agable
2013/10/21 20:16:42
This is confusing, especially since you're calling
iannucci
2013/10/22 07:28:22
Hm... not sure I agree, but sure. Done.
| |
115 commit_id = run_git(*commit_cmd) | |
116 run_git('update-ref', REF, commit_id) | |
117 | |
118 | |
119 def preload_tree(prefix): | |
120 """Returns the prefix and parsed tree object for the specified prefix.""" | |
121 return prefix, get_number_tree(prefix) | |
122 | |
123 | |
124 def all_prefixes(depth=PREFIX_LEN): | |
125 for x in (chr(i) for i in xrange(255)): | |
126 if depth > 1: | |
127 for r in all_prefixes(depth-1): | |
128 yield x+r | |
129 else: | |
130 yield x | |
131 | |
132 | |
133 def load(targets): | |
134 """Load/calculate the generation numbers for targets. | |
135 | |
136 Args: | |
137 targets - An iterable of binary-encoded full git commit id hashes. | |
138 """ | |
139 if all(get_num(t) is not None for t in targets): | |
140 return | |
141 | |
142 if git_tree(REF) is None: | |
143 empty = git_mktree({}) | |
144 ref = run_git('commit-tree', '-m', 'Initial commit from git-number', empty) | |
145 run_git('update-ref', REF, ref) | |
146 | |
147 with ScopedPool() as pool: | |
148 preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) | |
149 | |
150 rev_list = [] | |
151 | |
152 with StatusPrinter('Loading commits: %d') as inc: | |
153 # Curiously, buffering the list into memory seems to be the fastest | |
154 # approach in python (as opposed to iterating over the lines in the | |
155 # stdout as they're produced). GIL strikes again :/ | |
156 cmd = [ | |
157 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF | |
158 ] + map(hexlify, targets) | |
159 for line in run_git(*cmd).splitlines(): | |
160 toks = map(unhexlify, line.split()) | |
agable
2013/10/21 20:16:42
tokens
iannucci
2013/10/22 07:28:22
Done.
| |
161 rev_list.append((toks[0], toks[1:])) | |
162 inc() | |
163 | |
164 for prefix, tree in preload_iter: | |
165 get_number_tree.cache[prefix] = tree | |
166 | |
167 with StatusPrinter('Counting: %%d/%d' % len(rev_list)) as inc: | |
168 for ref, pars in rev_list: | |
169 num = max(map(get_num, pars)) + 1 if pars else 0 | |
170 | |
171 prefix = ref[:PREFIX_LEN] | |
172 get_number_tree(prefix)[ref] = num | |
173 DIRTY_TREES[prefix] += 1 | |
174 get_num.cache[ref] = num | |
175 | |
176 inc() | |
177 | |
178 | |
179 def main(): | |
180 parser = optparse.OptionParser( | |
181 usage='usage: %prog [options] [<committish>]\n\n' | |
182 '<committish> defaults to HEAD') | |
183 parser.add_option('--no-cache', action='store_true', | |
184 help='Do not actually cache anything we calculate.') | |
185 parser.add_option('--reset', action='store_true', | |
186 help='Reset the generation number cache and quit.') | |
187 parser.add_option('-v', '--verbose', action='count', | |
agable
2013/10/21 20:16:42
default=0
iannucci
2013/10/22 07:28:22
Done.
| |
188 help='Be verbose. Use more times for more verbosity.') | |
189 opts, args = parser.parse_args() | |
190 | |
191 if not args: | |
192 args = ['HEAD'] | |
193 | |
194 if opts.verbose: | |
M-A Ruel
2013/10/21 17:56:44
Why do it conditionally?
iannucci
2013/10/22 07:28:22
Done.
| |
195 git_common.VERBOSE_LEVEL = opts.verbose | |
196 | |
197 if opts.reset: | |
198 run_git('update-ref', '-d', REF) | |
M-A Ruel
2013/10/21 17:56:44
I'd prefer to return 0 right after instead of usin
iannucci
2013/10/22 07:28:22
Done.
| |
199 else: | |
200 targets = parse_committish(*args) | |
201 load(targets) | |
202 for t in targets: | |
203 print get_num(t) | |
204 if not opts.no_cache: | |
205 finalize(targets) | |
M-A Ruel
2013/10/21 17:56:44
return 0 after.
iannucci
2013/10/22 07:28:22
Done.
| |
206 | |
207 | |
208 if __name__ == '__main__': | |
209 try: | |
210 main() | |
M-A Ruel
2013/10/21 17:56:44
sys.exit(main())
iannucci
2013/10/22 07:28:22
Done.
| |
211 except KeyboardInterrupt: | |
M-A Ruel
2013/10/21 17:56:44
I'd prefer this to be handled inside the main().
iannucci
2013/10/22 07:28:22
Done.
| |
212 pass | |
OLD | NEW |