OLD | NEW |
---|---|
(Empty) | |
1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import atexit | |
6 import collections | |
7 import copy | |
8 import datetime | |
9 import hashlib | |
10 import os | |
11 import shutil | |
12 import subprocess | |
13 import tempfile | |
14 import unittest | |
15 | |
16 | |
17 def git_hash_data(data, typ='blob'): | |
18 """Calculate the git-style SHA1 for some data. | |
19 | |
20 Only supports 'blob' type data at the moment. | |
21 """ | |
22 assert typ == 'blob', 'Only support blobs for now' | |
23 return hashlib.sha1('blob %s\0%s' % (len(data), data)).hexdigest() | |
24 | |
25 | |
26 class OrderedSet(collections.MutableSet): | |
27 # from http://code.activestate.com/recipes/576694/ | |
28 def __init__(self, iterable=None): | |
29 self.end = end = [] | |
30 end += [None, end, end] # sentinel node for doubly linked list | |
31 self.data = {} # key --> [key, prev, next] | |
32 if iterable is not None: | |
33 self |= iterable | |
34 | |
35 def __contains__(self, key): | |
36 return key in self.data | |
37 | |
38 def __eq__(self, other): | |
M-A Ruel
2013/11/15 22:37:06
It's generally dangerous to implement __eq__ with
iannucci
2013/11/15 23:18:00
Budunno, I airlifted this from the activestate rec
| |
39 if isinstance(other, OrderedSet): | |
40 return len(self) == len(other) and list(self) == list(other) | |
41 return set(self) == set(other) | |
42 | |
43 def __len__(self): | |
44 return len(self.data) | |
45 | |
46 def __iter__(self): | |
47 end = self.end | |
48 curr = end[2] | |
49 while curr is not end: | |
50 yield curr[0] | |
51 curr = curr[2] | |
52 | |
53 def __repr__(self): | |
54 if not self: | |
55 return '%s()' % (self.__class__.__name__,) | |
56 return '%s(%r)' % (self.__class__.__name__, list(self)) | |
57 | |
58 def __reversed__(self): | |
59 end = self.end | |
60 curr = end[1] | |
61 while curr is not end: | |
62 yield curr[0] | |
63 curr = curr[1] | |
64 | |
65 def add(self, key): | |
66 if key not in self.data: | |
67 end = self.end | |
68 curr = end[1] | |
69 curr[2] = end[1] = self.data[key] = [key, curr, end] | |
70 | |
71 def difference_update(self, *others): | |
72 for other in others: | |
73 for i in other: | |
74 self.discard(i) | |
75 | |
76 def discard(self, key): | |
77 if key in self.data: | |
78 key, prev, nxt = self.data.pop(key) | |
79 prev[2] = nxt | |
80 nxt[1] = prev | |
81 | |
82 def pop(self, last=True): # pylint: disable=W0221 | |
83 if not self: | |
84 raise KeyError('set is empty') | |
85 key = self.end[1][0] if last else self.end[2][0] | |
86 self.discard(key) | |
87 return key | |
88 | |
89 | |
90 class GitRepoSchema(object): | |
91 """A declarative git testing repo. | |
92 | |
93 Pass a schema to __init__ in the form of: | |
94 A B C D | |
95 B E D | |
96 | |
97 This is the repo | |
98 | |
99 A - B - C - D | |
100 \ E / | |
101 | |
102 Whitespace doesn't matter. Each line is a declaration of which commits come | |
103 before which other commits. | |
104 | |
105 Every commit gets a tag 'tag_%(commit)s' | |
106 Every unique terminal commit gets a branch 'branch_%(commit)s' | |
107 Last commit in First line is the branch 'master' | |
108 Root commits get a ref 'root_%(commit)s' | |
109 | |
110 Timestamps are in topo order, earlier commits (as indicated by their presence | |
111 in the schema) get earlier timestamps. Stamps start at the Unix Epoch, and | |
112 increment by 1 day each. | |
113 """ | |
114 COMMIT = collections.namedtuple('COMMIT', 'name parents is_branch is_root') | |
115 | |
116 def __init__(self, repo_schema='', | |
117 content_fn=lambda v: {v: {'data': v}}): | |
118 """Builds a new GitRepoSchema. | |
119 | |
120 Args: | |
121 repo_schema (str) - Initial schema for this repo. See class docstring for | |
122 info on the schema format. | |
123 content_fn ((commit_name) -> commit_data) - A function which will be | |
124 lazily called to obtain data for each commit. The results of this | |
125 function are cached (i.e. it will never be called twice for the same | |
126 commit_name). See the docstring on the GitRepo class for the format of | |
127 the data returned by this function. | |
128 """ | |
129 self.master = None | |
130 self.par_map = {} | |
131 self.data_cache = {} | |
132 self.content_fn = content_fn | |
133 self.add_commits(repo_schema) | |
134 | |
135 def walk(self): | |
136 """(Generator) Walks the repo schema from roots to tips. | |
137 | |
138 Generates GitRepoSchema.COMMIT objects for each commit. | |
139 | |
140 Throws an AssertionError if it detects a cycle. | |
141 """ | |
142 is_root = True | |
143 par_map = copy.deepcopy(self.par_map) | |
144 while par_map: | |
145 empty_keys = set(k for k, v in par_map.iteritems() if not v) | |
146 if empty_keys: | |
M-A Ruel
2013/11/15 22:37:06
I'd prefer:
assert empty_keys, 'Cycle detected! %s
iannucci
2013/11/15 23:18:00
Oh, yeah, good call. That's one of my peeves :(
| |
147 for k in sorted(empty_keys): | |
148 yield self.COMMIT(k, self.par_map[k], | |
149 not any(k in v for v in self.par_map.itervalues()), | |
150 is_root) | |
151 del par_map[k] | |
152 for v in par_map.itervalues(): | |
153 v.difference_update(empty_keys) | |
154 is_root = False | |
155 else: | |
156 assert False, 'Cycle detected! %s' % par_map | |
157 | |
158 def add_commits(self, schema): | |
159 """Adds more commits from a schema into the existing Schema. | |
160 | |
161 Args: | |
162 schema (str) - See class docstring for info on schema format. | |
163 | |
164 Throws an AssertionError if it detects a cycle. | |
165 """ | |
166 for commits in (l.split() for l in schema.splitlines() if l.strip()): | |
167 parent = None | |
168 for commit in commits: | |
169 if commit not in self.par_map: | |
170 self.par_map[commit] = OrderedSet() | |
171 if parent is not None: | |
172 self.par_map[commit].add(parent) | |
173 parent = commit | |
174 if parent and self.master is None: | |
M-A Ruel
2013/11/15 22:37:06
self.master == '' is a valid value?
iannucci
2013/11/15 23:18:00
No, it's the initial state. It's also not possible
| |
175 self.master = parent | |
176 for _ in self.walk(): # This will throw if there are any cycles. | |
177 pass | |
178 | |
179 def reify(self): | |
180 """Returns a real GitRepo for this GitRepoSchema""" | |
181 return GitRepo(self) | |
182 | |
183 def data_for(self, commit): | |
184 """Method to obtain data for a commit. | |
M-A Ruel
2013/11/15 22:37:06
Obtains the data for a commit.
iannucci
2013/11/15 23:18:00
Done.
| |
185 | |
186 See the docstring on the GitRepo class for the format of the returned data. | |
187 | |
188 Caches the result on this GitRepoSchema instance. | |
189 """ | |
190 if commit not in self.data_cache: | |
191 self.data_cache[commit] = self.content_fn(commit) | |
192 return self.data_cache[commit] | |
193 | |
194 | |
195 class GitRepo(object): | |
196 """Creates a real git repo for a GitRepoSchema. | |
197 | |
198 Obtains schema and content information from the GitRepoSchema. | |
199 | |
200 The format for the commit data supplied by GitRepoSchema.data_for is: | |
201 { | |
202 SPECIAL_KEY: special_value, | |
203 ... | |
204 "path/to/some/file": { 'data': "some data content for this file", | |
205 'mode': 0755 }, | |
206 ... | |
207 } | |
208 | |
209 The SPECIAL_KEYs are the following attribues of the GitRepo class: | |
210 * AUTHOR_NAME | |
211 * AUTHOR_EMAIL | |
212 * AUTHOR_DATE - must be a datetime.datetime instance | |
213 * COMMITTER_NAME | |
214 * COMMITTER_EMAIL | |
215 * COMMITTER_DATE - must be a datetime.datetime instance | |
216 | |
217 For file content, if 'data' is None, then this commit will `git rm` that file. | |
218 """ | |
219 BASE_TEMP_DIR = tempfile.mkdtemp(suffix='base', prefix='git_repo') | |
220 atexit.register(shutil.rmtree, BASE_TEMP_DIR) | |
221 | |
222 # Singleton objects to specify specific data in a commit dictionary. | |
223 AUTHOR_NAME = object() | |
224 AUTHOR_EMAIL = object() | |
225 AUTHOR_DATE = object() | |
226 COMMITTER_NAME = object() | |
227 COMMITTER_EMAIL = object() | |
228 COMMITTER_DATE = object() | |
229 | |
230 DEFAULT_AUTHOR_NAME = 'Author McAuthorly' | |
231 DEFAULT_AUTHOR_EMAIL = 'author@example.com' | |
232 DEFAULT_COMMITTER_NAME = 'Charles Committish' | |
233 DEFAULT_COMMITTER_EMAIL = 'commitish@example.com' | |
234 | |
235 COMMAND_OUTPUT = collections.namedtuple('COMMAND_OUTPUT', 'retcode stdout') | |
236 | |
237 def __init__(self, schema): | |
238 """Makes new GitRepo. | |
239 | |
240 Automatically creates a temp folder under GitRepo.BASE_TEMP_DIR. It's | |
241 recommended that you clean this repo up by calling nuke() on it, but if not, | |
242 GitRepo will automatically clean up all allocated repos at the exit of the | |
243 program (assuming a normal exit like with sys.exit) | |
244 | |
245 Args: | |
246 schema - An instance of GitRepoSchema | |
247 """ | |
248 self.repo_path = tempfile.mkdtemp(dir=self.BASE_TEMP_DIR) | |
249 self.commit_map = {} | |
250 self._date = datetime.datetime(1970, 1, 1) | |
251 | |
252 self.git('init') | |
253 for commit in schema.walk(): | |
254 self._add_schema_commit(commit, schema.data_for(commit.name)) | |
255 if schema.master: | |
256 self.git('update-ref', 'master', self[schema.master]) | |
257 | |
258 def __getitem__(self, commit_name): | |
259 """Gets the hash of a commit by its schema name. | |
260 | |
261 >>> r = GitRepo(GitRepoSchema('A B C')) | |
262 >>> r['B'] | |
263 '7381febe1da03b09da47f009963ab7998a974935' | |
264 """ | |
265 return self.commit_map[commit_name] | |
266 | |
267 def _add_schema_commit(self, commit, data): | |
268 data = data or {} | |
269 | |
270 if commit.parents: | |
271 parents = list(commit.parents) | |
272 self.git('checkout', '--detach', '-q', self[parents[0]]) | |
273 if len(parents) > 1: | |
274 self.git('merge', '--no-commit', '-q', *[self[x] for x in parents[1:]]) | |
275 else: | |
276 self.git('checkout', '--orphan', 'root_%s' % commit.name) | |
277 self.git('rm', '-rf', '.') | |
278 | |
279 env = {} | |
280 for prefix in ('AUTHOR', 'COMMITTER'): | |
281 for suffix in ('NAME', 'EMAIL', 'DATE'): | |
282 singleton = '%s_%s' % (prefix, suffix) | |
283 key = getattr(self, singleton) | |
284 if key in data: | |
285 val = data[key] | |
286 else: | |
287 if suffix == 'DATE': | |
288 val = self._date | |
289 self._date += datetime.timedelta(days=1) | |
290 else: | |
291 val = getattr(self, 'DEFAULT_%s' % singleton) | |
292 env['GIT_%s' % singleton] = str(val) | |
293 | |
294 for fname, file_data in data.iteritems(): | |
295 deleted = False | |
296 if 'data' in file_data: | |
297 data = file_data.get('data') | |
298 if data is None: | |
299 deleted = True | |
300 self.git('rm', fname) | |
301 else: | |
302 path = os.path.join(self.repo_path, fname) | |
303 pardir = os.path.dirname(path) | |
304 if not os.path.exists(pardir): | |
305 os.makedirs(pardir) | |
306 with open(path, 'wb') as f: | |
307 f.write(data) | |
308 | |
309 mode = file_data.get('mode') | |
310 if mode and not deleted: | |
311 os.chmod(path, mode) | |
312 | |
313 self.git('add', fname) | |
314 | |
315 rslt = self.git('commit', '--allow-empty', '-m', commit.name, env=env) | |
316 assert rslt.retcode == 0, 'Failed to commit %s' % str(commit) | |
317 self.commit_map[commit.name] = self.git('rev-parse', 'HEAD').stdout.strip() | |
318 self.git('tag', 'tag_%s' % commit.name, self[commit.name]) | |
319 if commit.is_branch: | |
320 self.git('update-ref', 'branch_%s' % commit.name, self[commit.name]) | |
321 | |
322 def git(self, *args, **kwargs): | |
323 """Runs a git command specified by |args| in this repo.""" | |
324 assert self.repo_path is not None | |
325 try: | |
326 with open(os.devnull, 'wb') as devnull: | |
327 output = subprocess.check_output( | |
328 ('git',) + args, cwd=self.repo_path, stderr=devnull, **kwargs) | |
329 return self.COMMAND_OUTPUT(0, output) | |
330 except subprocess.CalledProcessError as e: | |
331 return self.COMMAND_OUTPUT(e.returncode, e.output) | |
332 | |
333 def nuke(self): | |
334 """Obliterates the git repo on disk. | |
335 | |
336 Causes this GitRepo to be unusable. | |
337 """ | |
338 shutil.rmtree(self.repo_path) | |
339 self.repo_path = None | |
340 | |
341 def run(self, fn, *args, **kwargs): | |
342 """Run a python function with the given args and kwargs with the cwd set to | |
343 the git repo.""" | |
344 assert self.repo_path is not None | |
345 curdir = os.getcwd() | |
346 try: | |
347 os.chdir(self.repo_path) | |
348 return fn(*args, **kwargs) | |
349 finally: | |
350 os.chdir(curdir) | |
351 | |
352 | |
353 class GitRepoSchemaTestBase(unittest.TestCase): | |
354 """A TestCase with a built-in GitRepoSchema. | |
355 | |
356 Expects a class variable REPO to be a GitRepoSchema string in the form | |
357 described by that class. | |
358 | |
359 You may also set class variables in the form COMMIT_%(commit_name)s, which | |
360 provide the content for the given commit_name commits. | |
361 | |
362 You probably will end up using either GitRepoReadOnlyTestBase or | |
363 GitRepoReadWriteTestBase for real tests. | |
364 """ | |
365 REPO = None | |
366 | |
367 @classmethod | |
368 def getRepoContent(cls, commit): | |
369 return getattr(cls, 'COMMIT_%s' % commit, None) | |
370 | |
371 @classmethod | |
372 def setUpClass(cls): | |
373 super(GitRepoSchemaTestBase, cls).setUpClass() | |
374 assert cls.REPO is not None | |
375 cls.r_schema = GitRepoSchema(cls.REPO, cls.getRepoContent) | |
376 | |
377 | |
378 class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase): | |
379 """Injects a GitRepo object given the schema and content from | |
380 GitRepoSchemaTestBase into TestCase classes which subclass this. | |
381 | |
382 This GitRepo will appear as self.repo, and will be deleted and recreated once | |
383 for the duration of all the tests in the subclass. | |
384 """ | |
385 REPO = None | |
386 | |
387 @classmethod | |
388 def setUpClass(cls): | |
389 super(GitRepoReadOnlyTestBase, cls).setUpClass() | |
390 assert cls.REPO is not None | |
391 cls.repo = cls.r_schema.reify() | |
392 | |
393 @classmethod | |
394 def tearDownClass(cls): | |
395 cls.repo.nuke() | |
396 super(GitRepoReadOnlyTestBase, cls).tearDownClass() | |
397 | |
398 | |
399 class GitRepoReadWriteTestBase(GitRepoSchemaTestBase): | |
400 """Injects a GitRepo object given the schema and content from | |
401 GitRepoSchemaTestBase into TestCase classes which subclass this. | |
402 | |
403 This GitRepo will appear as self.repo, and will be deleted and recreated for | |
404 each test function in the subclass. | |
405 """ | |
406 REPO = None | |
407 | |
408 def setUp(self): | |
409 super(GitRepoReadWriteTestBase, self).setUp() | |
410 self.repo = self.r_schema.reify() | |
411 | |
412 def tearDown(self): | |
413 self.repo.nuke() | |
414 super(GitRepoReadWriteTestBase, self).tearDown() | |
OLD | NEW |