| OLD | NEW |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import atexit | 5 import atexit |
| 6 import collections | 6 import collections |
| 7 import copy | 7 import copy |
| 8 import datetime | 8 import datetime |
| 9 import hashlib | 9 import hashlib |
| 10 import os | 10 import os |
| 11 import shutil | 11 import shutil |
| 12 import subprocess | 12 import subprocess |
| 13 import sys |
| 13 import tempfile | 14 import tempfile |
| 14 import unittest | 15 import unittest |
| 15 | 16 |
| 17 from cStringIO import StringIO |
| 18 |
| 16 | 19 |
| 17 def git_hash_data(data, typ='blob'): | 20 def git_hash_data(data, typ='blob'): |
| 18 """Calculate the git-style SHA1 for some data. | 21 """Calculate the git-style SHA1 for some data. |
| 19 | 22 |
| 20 Only supports 'blob' type data at the moment. | 23 Only supports 'blob' type data at the moment. |
| 21 """ | 24 """ |
| 22 assert typ == 'blob', 'Only support blobs for now' | 25 assert typ == 'blob', 'Only support blobs for now' |
| 23 return hashlib.sha1('blob %s\0%s' % (len(data), data)).hexdigest() | 26 return hashlib.sha1('blob %s\0%s' % (len(data), data)).hexdigest() |
| 24 | 27 |
| 25 | 28 |
| (...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 152 | 155 |
| 153 for k in sorted(empty_keys): | 156 for k in sorted(empty_keys): |
| 154 yield self.COMMIT(k, self.par_map[k], | 157 yield self.COMMIT(k, self.par_map[k], |
| 155 not any(k in v for v in self.par_map.itervalues()), | 158 not any(k in v for v in self.par_map.itervalues()), |
| 156 is_root) | 159 is_root) |
| 157 del par_map[k] | 160 del par_map[k] |
| 158 for v in par_map.itervalues(): | 161 for v in par_map.itervalues(): |
| 159 v.difference_update(empty_keys) | 162 v.difference_update(empty_keys) |
| 160 is_root = False | 163 is_root = False |
| 161 | 164 |
| 165 def add_partial(self, commit, parent=None): |
| 166 if commit not in self.par_map: |
| 167 self.par_map[commit] = OrderedSet() |
| 168 if parent is not None: |
| 169 self.par_map[commit].add(parent) |
| 170 |
| 162 def add_commits(self, schema): | 171 def add_commits(self, schema): |
| 163 """Adds more commits from a schema into the existing Schema. | 172 """Adds more commits from a schema into the existing Schema. |
| 164 | 173 |
| 165 Args: | 174 Args: |
| 166 schema (str) - See class docstring for info on schema format. | 175 schema (str) - See class docstring for info on schema format. |
| 167 | 176 |
| 168 Throws an AssertionError if it detects a cycle. | 177 Throws an AssertionError if it detects a cycle. |
| 169 """ | 178 """ |
| 170 for commits in (l.split() for l in schema.splitlines() if l.strip()): | 179 for commits in (l.split() for l in schema.splitlines() if l.strip()): |
| 171 parent = None | 180 parent = None |
| 172 for commit in commits: | 181 for commit in commits: |
| 173 if commit not in self.par_map: | 182 self.add_partial(commit, parent) |
| 174 self.par_map[commit] = OrderedSet() | |
| 175 if parent is not None: | |
| 176 self.par_map[commit].add(parent) | |
| 177 parent = commit | 183 parent = commit |
| 178 if parent and not self.master: | 184 if parent and not self.master: |
| 179 self.master = parent | 185 self.master = parent |
| 180 for _ in self.walk(): # This will throw if there are any cycles. | 186 for _ in self.walk(): # This will throw if there are any cycles. |
| 181 pass | 187 pass |
| 182 | 188 |
| 183 def reify(self): | 189 def reify(self): |
| 184 """Returns a real GitRepo for this GitRepoSchema""" | 190 """Returns a real GitRepo for this GitRepoSchema""" |
| 185 return GitRepo(self) | 191 return GitRepo(self) |
| 186 | 192 |
| 187 def data_for(self, commit): | 193 def data_for(self, commit): |
| 188 """Obtains the data for |commit|. | 194 """Obtains the data for |commit|. |
| 189 | 195 |
| 190 See the docstring on the GitRepo class for the format of the returned data. | 196 See the docstring on the GitRepo class for the format of the returned data. |
| 191 | 197 |
| 192 Caches the result on this GitRepoSchema instance. | 198 Caches the result on this GitRepoSchema instance. |
| 193 """ | 199 """ |
| 194 if commit not in self.data_cache: | 200 if commit not in self.data_cache: |
| 195 self.data_cache[commit] = self.content_fn(commit) | 201 self.data_cache[commit] = self.content_fn(commit) |
| 196 return self.data_cache[commit] | 202 return self.data_cache[commit] |
| 197 | 203 |
| 204 def simple_graph(self): |
| 205 """Returns a dictionary of {commit_subject: {parent commit_subjects}} |
| 206 |
| 207 This allows you to get a very simple connection graph over the whole repo |
| 208 for comparison purposes. Only commit subjects (not ids, not content/data) |
| 209 are considered |
| 210 """ |
| 211 ret = {} |
| 212 for commit in self.walk(): |
| 213 ret.setdefault(commit.name, set()).update(commit.parents) |
| 214 return ret |
| 215 |
| 198 | 216 |
| 199 class GitRepo(object): | 217 class GitRepo(object): |
| 200 """Creates a real git repo for a GitRepoSchema. | 218 """Creates a real git repo for a GitRepoSchema. |
| 201 | 219 |
| 202 Obtains schema and content information from the GitRepoSchema. | 220 Obtains schema and content information from the GitRepoSchema. |
| 203 | 221 |
| 204 The format for the commit data supplied by GitRepoSchema.data_for is: | 222 The format for the commit data supplied by GitRepoSchema.data_for is: |
| 205 { | 223 { |
| 206 SPECIAL_KEY: special_value, | 224 SPECIAL_KEY: special_value, |
| 207 ... | 225 ... |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 246 GitRepo will automatically clean up all allocated repos at the exit of the | 264 GitRepo will automatically clean up all allocated repos at the exit of the |
| 247 program (assuming a normal exit like with sys.exit) | 265 program (assuming a normal exit like with sys.exit) |
| 248 | 266 |
| 249 Args: | 267 Args: |
| 250 schema - An instance of GitRepoSchema | 268 schema - An instance of GitRepoSchema |
| 251 """ | 269 """ |
| 252 self.repo_path = tempfile.mkdtemp(dir=self.BASE_TEMP_DIR) | 270 self.repo_path = tempfile.mkdtemp(dir=self.BASE_TEMP_DIR) |
| 253 self.commit_map = {} | 271 self.commit_map = {} |
| 254 self._date = datetime.datetime(1970, 1, 1) | 272 self._date = datetime.datetime(1970, 1, 1) |
| 255 | 273 |
| 274 self.to_schema_refs = ['--branches'] |
| 275 |
| 256 self.git('init') | 276 self.git('init') |
| 277 self.git('config', 'user.name', 'testcase') |
| 278 self.git('config', 'user.email', 'testcase@example.com') |
| 257 for commit in schema.walk(): | 279 for commit in schema.walk(): |
| 258 self._add_schema_commit(commit, schema.data_for(commit.name)) | 280 self._add_schema_commit(commit, schema.data_for(commit.name)) |
| 259 self.last_commit = self[commit.name] | 281 self.last_commit = self[commit.name] |
| 260 if schema.master: | 282 if schema.master: |
| 261 self.git('update-ref', 'master', self[schema.master]) | 283 self.git('update-ref', 'refs/heads/master', self[schema.master]) |
| 262 | 284 |
| 263 def __getitem__(self, commit_name): | 285 def __getitem__(self, commit_name): |
| 264 """Gets the hash of a commit by its schema name. | 286 """Gets the hash of a commit by its schema name. |
| 265 | 287 |
| 266 >>> r = GitRepo(GitRepoSchema('A B C')) | 288 >>> r = GitRepo(GitRepoSchema('A B C')) |
| 267 >>> r['B'] | 289 >>> r['B'] |
| 268 '7381febe1da03b09da47f009963ab7998a974935' | 290 '7381febe1da03b09da47f009963ab7998a974935' |
| 269 """ | 291 """ |
| 270 return self.commit_map[commit_name] | 292 return self.commit_map[commit_name] |
| 271 | 293 |
| 272 def _add_schema_commit(self, commit, data): | 294 def _add_schema_commit(self, commit, commit_data): |
| 273 data = data or {} | 295 commit_data = commit_data or {} |
| 274 | 296 |
| 275 if commit.parents: | 297 if commit.parents: |
| 276 parents = list(commit.parents) | 298 parents = list(commit.parents) |
| 277 self.git('checkout', '--detach', '-q', self[parents[0]]) | 299 self.git('checkout', '--detach', '-q', self[parents[0]]) |
| 278 if len(parents) > 1: | 300 if len(parents) > 1: |
| 279 self.git('merge', '--no-commit', '-q', *[self[x] for x in parents[1:]]) | 301 self.git('merge', '--no-commit', '-q', *[self[x] for x in parents[1:]]) |
| 280 else: | 302 else: |
| 281 self.git('checkout', '--orphan', 'root_%s' % commit.name) | 303 self.git('checkout', '--orphan', 'root_%s' % commit.name) |
| 282 self.git('rm', '-rf', '.') | 304 self.git('rm', '-rf', '.') |
| 283 | 305 |
| 284 env = {} | 306 env = self.get_git_commit_env(commit_data) |
| 285 for prefix in ('AUTHOR', 'COMMITTER'): | |
| 286 for suffix in ('NAME', 'EMAIL', 'DATE'): | |
| 287 singleton = '%s_%s' % (prefix, suffix) | |
| 288 key = getattr(self, singleton) | |
| 289 if key in data: | |
| 290 val = data[key] | |
| 291 else: | |
| 292 if suffix == 'DATE': | |
| 293 val = self._date | |
| 294 self._date += datetime.timedelta(days=1) | |
| 295 else: | |
| 296 val = getattr(self, 'DEFAULT_%s' % singleton) | |
| 297 env['GIT_%s' % singleton] = str(val) | |
| 298 | 307 |
| 299 for fname, file_data in data.iteritems(): | 308 for fname, file_data in commit_data.iteritems(): |
| 300 deleted = False | 309 deleted = False |
| 301 if 'data' in file_data: | 310 if 'data' in file_data: |
| 302 data = file_data.get('data') | 311 data = file_data.get('data') |
| 303 if data is None: | 312 if data is None: |
| 304 deleted = True | 313 deleted = True |
| 305 self.git('rm', fname) | 314 self.git('rm', fname) |
| 306 else: | 315 else: |
| 307 path = os.path.join(self.repo_path, fname) | 316 path = os.path.join(self.repo_path, fname) |
| 308 pardir = os.path.dirname(path) | 317 pardir = os.path.dirname(path) |
| 309 if not os.path.exists(pardir): | 318 if not os.path.exists(pardir): |
| 310 os.makedirs(pardir) | 319 os.makedirs(pardir) |
| 311 with open(path, 'wb') as f: | 320 with open(path, 'wb') as f: |
| 312 f.write(data) | 321 f.write(data) |
| 313 | 322 |
| 314 mode = file_data.get('mode') | 323 mode = file_data.get('mode') |
| 315 if mode and not deleted: | 324 if mode and not deleted: |
| 316 os.chmod(path, mode) | 325 os.chmod(path, mode) |
| 317 | 326 |
| 318 self.git('add', fname) | 327 self.git('add', fname) |
| 319 | 328 |
| 320 rslt = self.git('commit', '--allow-empty', '-m', commit.name, env=env) | 329 rslt = self.git('commit', '--allow-empty', '-m', commit.name, env=env) |
| 321 assert rslt.retcode == 0, 'Failed to commit %s' % str(commit) | 330 assert rslt.retcode == 0, 'Failed to commit %s' % str(commit) |
| 322 self.commit_map[commit.name] = self.git('rev-parse', 'HEAD').stdout.strip() | 331 self.commit_map[commit.name] = self.git('rev-parse', 'HEAD').stdout.strip() |
| 323 self.git('tag', 'tag_%s' % commit.name, self[commit.name]) | 332 self.git('tag', 'tag_%s' % commit.name, self[commit.name]) |
| 324 if commit.is_branch: | 333 if commit.is_branch: |
| 325 self.git('branch', '-f', 'branch_%s' % commit.name, self[commit.name]) | 334 self.git('branch', '-f', 'branch_%s' % commit.name, self[commit.name]) |
| 326 | 335 |
| 336 def get_git_commit_env(self, commit_data=None): |
| 337 commit_data = commit_data or {} |
| 338 env = {} |
| 339 for prefix in ('AUTHOR', 'COMMITTER'): |
| 340 for suffix in ('NAME', 'EMAIL', 'DATE'): |
| 341 singleton = '%s_%s' % (prefix, suffix) |
| 342 key = getattr(self, singleton) |
| 343 if key in commit_data: |
| 344 val = commit_data[key] |
| 345 else: |
| 346 if suffix == 'DATE': |
| 347 val = self._date |
| 348 self._date += datetime.timedelta(days=1) |
| 349 else: |
| 350 val = getattr(self, 'DEFAULT_%s' % singleton) |
| 351 env['GIT_%s' % singleton] = str(val) |
| 352 return env |
| 353 |
| 354 |
| 327 def git(self, *args, **kwargs): | 355 def git(self, *args, **kwargs): |
| 328 """Runs a git command specified by |args| in this repo.""" | 356 """Runs a git command specified by |args| in this repo.""" |
| 329 assert self.repo_path is not None | 357 assert self.repo_path is not None |
| 330 try: | 358 try: |
| 331 with open(os.devnull, 'wb') as devnull: | 359 with open(os.devnull, 'wb') as devnull: |
| 332 output = subprocess.check_output( | 360 output = subprocess.check_output( |
| 333 ('git',) + args, cwd=self.repo_path, stderr=devnull, **kwargs) | 361 ('git',) + args, cwd=self.repo_path, stderr=devnull, **kwargs) |
| 334 return self.COMMAND_OUTPUT(0, output) | 362 return self.COMMAND_OUTPUT(0, output) |
| 335 except subprocess.CalledProcessError as e: | 363 except subprocess.CalledProcessError as e: |
| 336 return self.COMMAND_OUTPUT(e.returncode, e.output) | 364 return self.COMMAND_OUTPUT(e.returncode, e.output) |
| 337 | 365 |
| 366 def git_commit(self, message): |
| 367 return self.git('commit', '-am', message, env=self.get_git_commit_env()) |
| 368 |
| 338 def nuke(self): | 369 def nuke(self): |
| 339 """Obliterates the git repo on disk. | 370 """Obliterates the git repo on disk. |
| 340 | 371 |
| 341 Causes this GitRepo to be unusable. | 372 Causes this GitRepo to be unusable. |
| 342 """ | 373 """ |
| 343 shutil.rmtree(self.repo_path) | 374 shutil.rmtree(self.repo_path) |
| 344 self.repo_path = None | 375 self.repo_path = None |
| 345 | 376 |
| 346 def run(self, fn, *args, **kwargs): | 377 def run(self, fn, *args, **kwargs): |
| 347 """Run a python function with the given args and kwargs with the cwd set to | 378 """Run a python function with the given args and kwargs with the cwd set to |
| 348 the git repo.""" | 379 the git repo.""" |
| 349 assert self.repo_path is not None | 380 assert self.repo_path is not None |
| 350 curdir = os.getcwd() | 381 curdir = os.getcwd() |
| 351 try: | 382 try: |
| 352 os.chdir(self.repo_path) | 383 os.chdir(self.repo_path) |
| 353 return fn(*args, **kwargs) | 384 return fn(*args, **kwargs) |
| 354 finally: | 385 finally: |
| 355 os.chdir(curdir) | 386 os.chdir(curdir) |
| 356 | 387 |
| 388 def capture_stdio(self, fn, *args, **kwargs): |
| 389 """Run a python function with the given args and kwargs with the cwd set to |
| 390 the git repo. |
| 391 |
| 392 Returns the (stdout, stderr) of whatever ran, instead of the what |fn| |
| 393 returned. |
| 394 """ |
| 395 stdout = sys.stdout |
| 396 stderr = sys.stderr |
| 397 try: |
| 398 sys.stdout = StringIO() |
| 399 sys.stderr = StringIO() |
| 400 try: |
| 401 self.run(fn, *args, **kwargs) |
| 402 except SystemExit: |
| 403 pass |
| 404 return sys.stdout.getvalue(), sys.stderr.getvalue() |
| 405 finally: |
| 406 sys.stdout = stdout |
| 407 sys.stderr = stderr |
| 408 |
| 409 def open(self, path, mode='rb'): |
| 410 return open(os.path.join(self.repo_path, path), mode) |
| 411 |
| 412 def to_schema(self): |
| 413 lines = self.git('rev-list', '--parents', '--reverse', '--topo-order', |
| 414 '--format=%s', *self.to_schema_refs).stdout.splitlines() |
| 415 hash_to_msg = {} |
| 416 ret = GitRepoSchema() |
| 417 current = None |
| 418 parents = [] |
| 419 for line in lines: |
| 420 if line.startswith('commit'): |
| 421 assert current is None |
| 422 tokens = line.split() |
| 423 current, parents = tokens[1], tokens[2:] |
| 424 assert all(p in hash_to_msg for p in parents) |
| 425 else: |
| 426 assert current is not None |
| 427 hash_to_msg[current] = line |
| 428 ret.add_partial(line) |
| 429 for parent in parents: |
| 430 ret.add_partial(line, hash_to_msg[parent]) |
| 431 current = None |
| 432 parents = [] |
| 433 assert current is None |
| 434 return ret |
| 435 |
| 357 | 436 |
| 358 class GitRepoSchemaTestBase(unittest.TestCase): | 437 class GitRepoSchemaTestBase(unittest.TestCase): |
| 359 """A TestCase with a built-in GitRepoSchema. | 438 """A TestCase with a built-in GitRepoSchema. |
| 360 | 439 |
| 361 Expects a class variable REPO to be a GitRepoSchema string in the form | 440 Expects a class variable REPO_SCHEMA to be a GitRepoSchema string in the form |
| 362 described by that class. | 441 described by that class. |
| 363 | 442 |
| 364 You may also set class variables in the form COMMIT_%(commit_name)s, which | 443 You may also set class variables in the form COMMIT_%(commit_name)s, which |
| 365 provide the content for the given commit_name commits. | 444 provide the content for the given commit_name commits. |
| 366 | 445 |
| 367 You probably will end up using either GitRepoReadOnlyTestBase or | 446 You probably will end up using either GitRepoReadOnlyTestBase or |
| 368 GitRepoReadWriteTestBase for real tests. | 447 GitRepoReadWriteTestBase for real tests. |
| 369 """ | 448 """ |
| 370 REPO = None | 449 REPO_SCHEMA = None |
| 371 | 450 |
| 372 @classmethod | 451 @classmethod |
| 373 def getRepoContent(cls, commit): | 452 def getRepoContent(cls, commit): |
| 374 return getattr(cls, 'COMMIT_%s' % commit, None) | 453 return getattr(cls, 'COMMIT_%s' % commit, None) |
| 375 | 454 |
| 376 @classmethod | 455 @classmethod |
| 377 def setUpClass(cls): | 456 def setUpClass(cls): |
| 378 super(GitRepoSchemaTestBase, cls).setUpClass() | 457 super(GitRepoSchemaTestBase, cls).setUpClass() |
| 379 assert cls.REPO is not None | 458 assert cls.REPO_SCHEMA is not None |
| 380 cls.r_schema = GitRepoSchema(cls.REPO, cls.getRepoContent) | 459 cls.r_schema = GitRepoSchema(cls.REPO_SCHEMA, cls.getRepoContent) |
| 381 | 460 |
| 382 | 461 |
| 383 class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase): | 462 class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase): |
| 384 """Injects a GitRepo object given the schema and content from | 463 """Injects a GitRepo object given the schema and content from |
| 385 GitRepoSchemaTestBase into TestCase classes which subclass this. | 464 GitRepoSchemaTestBase into TestCase classes which subclass this. |
| 386 | 465 |
| 387 This GitRepo will appear as self.repo, and will be deleted and recreated once | 466 This GitRepo will appear as self.repo, and will be deleted and recreated once |
| 388 for the duration of all the tests in the subclass. | 467 for the duration of all the tests in the subclass. |
| 389 """ | 468 """ |
| 390 REPO = None | 469 REPO_SCHEMA = None |
| 391 | 470 |
| 392 @classmethod | 471 @classmethod |
| 393 def setUpClass(cls): | 472 def setUpClass(cls): |
| 394 super(GitRepoReadOnlyTestBase, cls).setUpClass() | 473 super(GitRepoReadOnlyTestBase, cls).setUpClass() |
| 395 assert cls.REPO is not None | 474 assert cls.REPO_SCHEMA is not None |
| 396 cls.repo = cls.r_schema.reify() | 475 cls.repo = cls.r_schema.reify() |
| 397 | 476 |
| 398 def setUp(self): | 477 def setUp(self): |
| 399 self.repo.git('checkout', '-f', self.repo.last_commit) | 478 self.repo.git('checkout', '-f', self.repo.last_commit) |
| 400 | 479 |
| 401 @classmethod | 480 @classmethod |
| 402 def tearDownClass(cls): | 481 def tearDownClass(cls): |
| 403 cls.repo.nuke() | 482 cls.repo.nuke() |
| 404 super(GitRepoReadOnlyTestBase, cls).tearDownClass() | 483 super(GitRepoReadOnlyTestBase, cls).tearDownClass() |
| 405 | 484 |
| 406 | 485 |
| 407 class GitRepoReadWriteTestBase(GitRepoSchemaTestBase): | 486 class GitRepoReadWriteTestBase(GitRepoSchemaTestBase): |
| 408 """Injects a GitRepo object given the schema and content from | 487 """Injects a GitRepo object given the schema and content from |
| 409 GitRepoSchemaTestBase into TestCase classes which subclass this. | 488 GitRepoSchemaTestBase into TestCase classes which subclass this. |
| 410 | 489 |
| 411 This GitRepo will appear as self.repo, and will be deleted and recreated for | 490 This GitRepo will appear as self.repo, and will be deleted and recreated for |
| 412 each test function in the subclass. | 491 each test function in the subclass. |
| 413 """ | 492 """ |
| 414 REPO = None | 493 REPO_SCHEMA = None |
| 415 | 494 |
| 416 def setUp(self): | 495 def setUp(self): |
| 417 super(GitRepoReadWriteTestBase, self).setUp() | 496 super(GitRepoReadWriteTestBase, self).setUp() |
| 418 self.repo = self.r_schema.reify() | 497 self.repo = self.r_schema.reify() |
| 419 | 498 |
| 420 def tearDown(self): | 499 def tearDown(self): |
| 421 self.repo.nuke() | 500 self.repo.nuke() |
| 422 super(GitRepoReadWriteTestBase, self).tearDown() | 501 super(GitRepoReadWriteTestBase, self).tearDown() |
| 502 |
| 503 def assertSchema(self, schema_string): |
| 504 self.assertEqual(GitRepoSchema(schema_string).simple_graph(), |
| 505 self.repo.to_schema().simple_graph()) |
| OLD | NEW |