OLD | NEW |
(Empty) | |
| 1 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 import atexit |
| 6 import collections |
| 7 import copy |
| 8 import datetime |
| 9 import hashlib |
| 10 import os |
| 11 import shutil |
| 12 import subprocess |
| 13 import tempfile |
| 14 import unittest |
| 15 |
| 16 |
| 17 def git_hash_data(data, typ='blob'): |
| 18 """Calculate the git-style SHA1 for some data. |
| 19 |
| 20 Only supports 'blob' type data at the moment. |
| 21 """ |
| 22 assert typ == 'blob', 'Only support blobs for now' |
| 23 return hashlib.sha1('blob %s\0%s' % (len(data), data)).hexdigest() |
| 24 |
| 25 |
| 26 class OrderedSet(collections.MutableSet): |
| 27 # from http://code.activestate.com/recipes/576694/ |
| 28 def __init__(self, iterable=None): |
| 29 self.end = end = [] |
| 30 end += [None, end, end] # sentinel node for doubly linked list |
| 31 self.data = {} # key --> [key, prev, next] |
| 32 if iterable is not None: |
| 33 self |= iterable |
| 34 |
| 35 def __contains__(self, key): |
| 36 return key in self.data |
| 37 |
| 38 def __eq__(self, other): |
| 39 if isinstance(other, OrderedSet): |
| 40 return len(self) == len(other) and list(self) == list(other) |
| 41 return set(self) == set(other) |
| 42 |
| 43 def __ne__(self, other): |
| 44 if isinstance(other, OrderedSet): |
| 45 return len(self) != len(other) or list(self) != list(other) |
| 46 return set(self) != set(other) |
| 47 |
| 48 def __len__(self): |
| 49 return len(self.data) |
| 50 |
| 51 def __iter__(self): |
| 52 end = self.end |
| 53 curr = end[2] |
| 54 while curr is not end: |
| 55 yield curr[0] |
| 56 curr = curr[2] |
| 57 |
| 58 def __repr__(self): |
| 59 if not self: |
| 60 return '%s()' % (self.__class__.__name__,) |
| 61 return '%s(%r)' % (self.__class__.__name__, list(self)) |
| 62 |
| 63 def __reversed__(self): |
| 64 end = self.end |
| 65 curr = end[1] |
| 66 while curr is not end: |
| 67 yield curr[0] |
| 68 curr = curr[1] |
| 69 |
| 70 def add(self, key): |
| 71 if key not in self.data: |
| 72 end = self.end |
| 73 curr = end[1] |
| 74 curr[2] = end[1] = self.data[key] = [key, curr, end] |
| 75 |
| 76 def difference_update(self, *others): |
| 77 for other in others: |
| 78 for i in other: |
| 79 self.discard(i) |
| 80 |
| 81 def discard(self, key): |
| 82 if key in self.data: |
| 83 key, prev, nxt = self.data.pop(key) |
| 84 prev[2] = nxt |
| 85 nxt[1] = prev |
| 86 |
| 87 def pop(self, last=True): # pylint: disable=W0221 |
| 88 if not self: |
| 89 raise KeyError('set is empty') |
| 90 key = self.end[1][0] if last else self.end[2][0] |
| 91 self.discard(key) |
| 92 return key |
| 93 |
| 94 |
| 95 class GitRepoSchema(object): |
| 96 """A declarative git testing repo. |
| 97 |
| 98 Pass a schema to __init__ in the form of: |
| 99 A B C D |
| 100 B E D |
| 101 |
| 102 This is the repo |
| 103 |
| 104 A - B - C - D |
| 105 \ E / |
| 106 |
| 107 Whitespace doesn't matter. Each line is a declaration of which commits come |
| 108 before which other commits. |
| 109 |
| 110 Every commit gets a tag 'tag_%(commit)s' |
| 111 Every unique terminal commit gets a branch 'branch_%(commit)s' |
| 112 Last commit in First line is the branch 'master' |
| 113 Root commits get a ref 'root_%(commit)s' |
| 114 |
| 115 Timestamps are in topo order, earlier commits (as indicated by their presence |
| 116 in the schema) get earlier timestamps. Stamps start at the Unix Epoch, and |
| 117 increment by 1 day each. |
| 118 """ |
| 119 COMMIT = collections.namedtuple('COMMIT', 'name parents is_branch is_root') |
| 120 |
| 121 def __init__(self, repo_schema='', |
| 122 content_fn=lambda v: {v: {'data': v}}): |
| 123 """Builds a new GitRepoSchema. |
| 124 |
| 125 Args: |
| 126 repo_schema (str) - Initial schema for this repo. See class docstring for |
| 127 info on the schema format. |
| 128 content_fn ((commit_name) -> commit_data) - A function which will be |
| 129 lazily called to obtain data for each commit. The results of this |
| 130 function are cached (i.e. it will never be called twice for the same |
| 131 commit_name). See the docstring on the GitRepo class for the format of |
| 132 the data returned by this function. |
| 133 """ |
| 134 self.master = None |
| 135 self.par_map = {} |
| 136 self.data_cache = {} |
| 137 self.content_fn = content_fn |
| 138 self.add_commits(repo_schema) |
| 139 |
| 140 def walk(self): |
| 141 """(Generator) Walks the repo schema from roots to tips. |
| 142 |
| 143 Generates GitRepoSchema.COMMIT objects for each commit. |
| 144 |
| 145 Throws an AssertionError if it detects a cycle. |
| 146 """ |
| 147 is_root = True |
| 148 par_map = copy.deepcopy(self.par_map) |
| 149 while par_map: |
| 150 empty_keys = set(k for k, v in par_map.iteritems() if not v) |
| 151 assert empty_keys, 'Cycle detected! %s' % par_map |
| 152 |
| 153 for k in sorted(empty_keys): |
| 154 yield self.COMMIT(k, self.par_map[k], |
| 155 not any(k in v for v in self.par_map.itervalues()), |
| 156 is_root) |
| 157 del par_map[k] |
| 158 for v in par_map.itervalues(): |
| 159 v.difference_update(empty_keys) |
| 160 is_root = False |
| 161 |
| 162 def add_commits(self, schema): |
| 163 """Adds more commits from a schema into the existing Schema. |
| 164 |
| 165 Args: |
| 166 schema (str) - See class docstring for info on schema format. |
| 167 |
| 168 Throws an AssertionError if it detects a cycle. |
| 169 """ |
| 170 for commits in (l.split() for l in schema.splitlines() if l.strip()): |
| 171 parent = None |
| 172 for commit in commits: |
| 173 if commit not in self.par_map: |
| 174 self.par_map[commit] = OrderedSet() |
| 175 if parent is not None: |
| 176 self.par_map[commit].add(parent) |
| 177 parent = commit |
| 178 if parent and not self.master: |
| 179 self.master = parent |
| 180 for _ in self.walk(): # This will throw if there are any cycles. |
| 181 pass |
| 182 |
| 183 def reify(self): |
| 184 """Returns a real GitRepo for this GitRepoSchema""" |
| 185 return GitRepo(self) |
| 186 |
| 187 def data_for(self, commit): |
| 188 """Obtains the data for |commit|. |
| 189 |
| 190 See the docstring on the GitRepo class for the format of the returned data. |
| 191 |
| 192 Caches the result on this GitRepoSchema instance. |
| 193 """ |
| 194 if commit not in self.data_cache: |
| 195 self.data_cache[commit] = self.content_fn(commit) |
| 196 return self.data_cache[commit] |
| 197 |
| 198 |
| 199 class GitRepo(object): |
| 200 """Creates a real git repo for a GitRepoSchema. |
| 201 |
| 202 Obtains schema and content information from the GitRepoSchema. |
| 203 |
| 204 The format for the commit data supplied by GitRepoSchema.data_for is: |
| 205 { |
| 206 SPECIAL_KEY: special_value, |
| 207 ... |
| 208 "path/to/some/file": { 'data': "some data content for this file", |
| 209 'mode': 0755 }, |
| 210 ... |
| 211 } |
| 212 |
| 213 The SPECIAL_KEYs are the following attribues of the GitRepo class: |
| 214 * AUTHOR_NAME |
| 215 * AUTHOR_EMAIL |
| 216 * AUTHOR_DATE - must be a datetime.datetime instance |
| 217 * COMMITTER_NAME |
| 218 * COMMITTER_EMAIL |
| 219 * COMMITTER_DATE - must be a datetime.datetime instance |
| 220 |
| 221 For file content, if 'data' is None, then this commit will `git rm` that file. |
| 222 """ |
| 223 BASE_TEMP_DIR = tempfile.mkdtemp(suffix='base', prefix='git_repo') |
| 224 atexit.register(shutil.rmtree, BASE_TEMP_DIR) |
| 225 |
| 226 # Singleton objects to specify specific data in a commit dictionary. |
| 227 AUTHOR_NAME = object() |
| 228 AUTHOR_EMAIL = object() |
| 229 AUTHOR_DATE = object() |
| 230 COMMITTER_NAME = object() |
| 231 COMMITTER_EMAIL = object() |
| 232 COMMITTER_DATE = object() |
| 233 |
| 234 DEFAULT_AUTHOR_NAME = 'Author McAuthorly' |
| 235 DEFAULT_AUTHOR_EMAIL = 'author@example.com' |
| 236 DEFAULT_COMMITTER_NAME = 'Charles Committish' |
| 237 DEFAULT_COMMITTER_EMAIL = 'commitish@example.com' |
| 238 |
| 239 COMMAND_OUTPUT = collections.namedtuple('COMMAND_OUTPUT', 'retcode stdout') |
| 240 |
| 241 def __init__(self, schema): |
| 242 """Makes new GitRepo. |
| 243 |
| 244 Automatically creates a temp folder under GitRepo.BASE_TEMP_DIR. It's |
| 245 recommended that you clean this repo up by calling nuke() on it, but if not, |
| 246 GitRepo will automatically clean up all allocated repos at the exit of the |
| 247 program (assuming a normal exit like with sys.exit) |
| 248 |
| 249 Args: |
| 250 schema - An instance of GitRepoSchema |
| 251 """ |
| 252 self.repo_path = tempfile.mkdtemp(dir=self.BASE_TEMP_DIR) |
| 253 self.commit_map = {} |
| 254 self._date = datetime.datetime(1970, 1, 1) |
| 255 |
| 256 self.git('init') |
| 257 for commit in schema.walk(): |
| 258 self._add_schema_commit(commit, schema.data_for(commit.name)) |
| 259 if schema.master: |
| 260 self.git('update-ref', 'master', self[schema.master]) |
| 261 |
| 262 def __getitem__(self, commit_name): |
| 263 """Gets the hash of a commit by its schema name. |
| 264 |
| 265 >>> r = GitRepo(GitRepoSchema('A B C')) |
| 266 >>> r['B'] |
| 267 '7381febe1da03b09da47f009963ab7998a974935' |
| 268 """ |
| 269 return self.commit_map[commit_name] |
| 270 |
| 271 def _add_schema_commit(self, commit, data): |
| 272 data = data or {} |
| 273 |
| 274 if commit.parents: |
| 275 parents = list(commit.parents) |
| 276 self.git('checkout', '--detach', '-q', self[parents[0]]) |
| 277 if len(parents) > 1: |
| 278 self.git('merge', '--no-commit', '-q', *[self[x] for x in parents[1:]]) |
| 279 else: |
| 280 self.git('checkout', '--orphan', 'root_%s' % commit.name) |
| 281 self.git('rm', '-rf', '.') |
| 282 |
| 283 env = {} |
| 284 for prefix in ('AUTHOR', 'COMMITTER'): |
| 285 for suffix in ('NAME', 'EMAIL', 'DATE'): |
| 286 singleton = '%s_%s' % (prefix, suffix) |
| 287 key = getattr(self, singleton) |
| 288 if key in data: |
| 289 val = data[key] |
| 290 else: |
| 291 if suffix == 'DATE': |
| 292 val = self._date |
| 293 self._date += datetime.timedelta(days=1) |
| 294 else: |
| 295 val = getattr(self, 'DEFAULT_%s' % singleton) |
| 296 env['GIT_%s' % singleton] = str(val) |
| 297 |
| 298 for fname, file_data in data.iteritems(): |
| 299 deleted = False |
| 300 if 'data' in file_data: |
| 301 data = file_data.get('data') |
| 302 if data is None: |
| 303 deleted = True |
| 304 self.git('rm', fname) |
| 305 else: |
| 306 path = os.path.join(self.repo_path, fname) |
| 307 pardir = os.path.dirname(path) |
| 308 if not os.path.exists(pardir): |
| 309 os.makedirs(pardir) |
| 310 with open(path, 'wb') as f: |
| 311 f.write(data) |
| 312 |
| 313 mode = file_data.get('mode') |
| 314 if mode and not deleted: |
| 315 os.chmod(path, mode) |
| 316 |
| 317 self.git('add', fname) |
| 318 |
| 319 rslt = self.git('commit', '--allow-empty', '-m', commit.name, env=env) |
| 320 assert rslt.retcode == 0, 'Failed to commit %s' % str(commit) |
| 321 self.commit_map[commit.name] = self.git('rev-parse', 'HEAD').stdout.strip() |
| 322 self.git('tag', 'tag_%s' % commit.name, self[commit.name]) |
| 323 if commit.is_branch: |
| 324 self.git('update-ref', 'branch_%s' % commit.name, self[commit.name]) |
| 325 |
| 326 def git(self, *args, **kwargs): |
| 327 """Runs a git command specified by |args| in this repo.""" |
| 328 assert self.repo_path is not None |
| 329 try: |
| 330 with open(os.devnull, 'wb') as devnull: |
| 331 output = subprocess.check_output( |
| 332 ('git',) + args, cwd=self.repo_path, stderr=devnull, **kwargs) |
| 333 return self.COMMAND_OUTPUT(0, output) |
| 334 except subprocess.CalledProcessError as e: |
| 335 return self.COMMAND_OUTPUT(e.returncode, e.output) |
| 336 |
| 337 def nuke(self): |
| 338 """Obliterates the git repo on disk. |
| 339 |
| 340 Causes this GitRepo to be unusable. |
| 341 """ |
| 342 shutil.rmtree(self.repo_path) |
| 343 self.repo_path = None |
| 344 |
| 345 def run(self, fn, *args, **kwargs): |
| 346 """Run a python function with the given args and kwargs with the cwd set to |
| 347 the git repo.""" |
| 348 assert self.repo_path is not None |
| 349 curdir = os.getcwd() |
| 350 try: |
| 351 os.chdir(self.repo_path) |
| 352 return fn(*args, **kwargs) |
| 353 finally: |
| 354 os.chdir(curdir) |
| 355 |
| 356 |
| 357 class GitRepoSchemaTestBase(unittest.TestCase): |
| 358 """A TestCase with a built-in GitRepoSchema. |
| 359 |
| 360 Expects a class variable REPO to be a GitRepoSchema string in the form |
| 361 described by that class. |
| 362 |
| 363 You may also set class variables in the form COMMIT_%(commit_name)s, which |
| 364 provide the content for the given commit_name commits. |
| 365 |
| 366 You probably will end up using either GitRepoReadOnlyTestBase or |
| 367 GitRepoReadWriteTestBase for real tests. |
| 368 """ |
| 369 REPO = None |
| 370 |
| 371 @classmethod |
| 372 def getRepoContent(cls, commit): |
| 373 return getattr(cls, 'COMMIT_%s' % commit, None) |
| 374 |
| 375 @classmethod |
| 376 def setUpClass(cls): |
| 377 super(GitRepoSchemaTestBase, cls).setUpClass() |
| 378 assert cls.REPO is not None |
| 379 cls.r_schema = GitRepoSchema(cls.REPO, cls.getRepoContent) |
| 380 |
| 381 |
| 382 class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase): |
| 383 """Injects a GitRepo object given the schema and content from |
| 384 GitRepoSchemaTestBase into TestCase classes which subclass this. |
| 385 |
| 386 This GitRepo will appear as self.repo, and will be deleted and recreated once |
| 387 for the duration of all the tests in the subclass. |
| 388 """ |
| 389 REPO = None |
| 390 |
| 391 @classmethod |
| 392 def setUpClass(cls): |
| 393 super(GitRepoReadOnlyTestBase, cls).setUpClass() |
| 394 assert cls.REPO is not None |
| 395 cls.repo = cls.r_schema.reify() |
| 396 |
| 397 @classmethod |
| 398 def tearDownClass(cls): |
| 399 cls.repo.nuke() |
| 400 super(GitRepoReadOnlyTestBase, cls).tearDownClass() |
| 401 |
| 402 |
| 403 class GitRepoReadWriteTestBase(GitRepoSchemaTestBase): |
| 404 """Injects a GitRepo object given the schema and content from |
| 405 GitRepoSchemaTestBase into TestCase classes which subclass this. |
| 406 |
| 407 This GitRepo will appear as self.repo, and will be deleted and recreated for |
| 408 each test function in the subclass. |
| 409 """ |
| 410 REPO = None |
| 411 |
| 412 def setUp(self): |
| 413 super(GitRepoReadWriteTestBase, self).setUp() |
| 414 self.repo = self.r_schema.reify() |
| 415 |
| 416 def tearDown(self): |
| 417 self.repo.nuke() |
| 418 super(GitRepoReadWriteTestBase, self).tearDown() |
OLD | NEW |