OLD | NEW |
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import atexit | 5 import atexit |
6 import collections | 6 import collections |
7 import copy | 7 import copy |
8 import datetime | 8 import datetime |
9 import hashlib | 9 import hashlib |
10 import os | 10 import os |
11 import shutil | 11 import shutil |
12 import subprocess | 12 import subprocess |
| 13 import sys |
13 import tempfile | 14 import tempfile |
14 import unittest | 15 import unittest |
15 | 16 |
| 17 from cStringIO import StringIO |
| 18 |
16 | 19 |
17 def git_hash_data(data, typ='blob'): | 20 def git_hash_data(data, typ='blob'): |
18 """Calculate the git-style SHA1 for some data. | 21 """Calculate the git-style SHA1 for some data. |
19 | 22 |
20 Only supports 'blob' type data at the moment. | 23 Only supports 'blob' type data at the moment. |
21 """ | 24 """ |
22 assert typ == 'blob', 'Only support blobs for now' | 25 assert typ == 'blob', 'Only support blobs for now' |
23 return hashlib.sha1('blob %s\0%s' % (len(data), data)).hexdigest() | 26 return hashlib.sha1('blob %s\0%s' % (len(data), data)).hexdigest() |
24 | 27 |
25 | 28 |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
152 | 155 |
153 for k in sorted(empty_keys): | 156 for k in sorted(empty_keys): |
154 yield self.COMMIT(k, self.par_map[k], | 157 yield self.COMMIT(k, self.par_map[k], |
155 not any(k in v for v in self.par_map.itervalues()), | 158 not any(k in v for v in self.par_map.itervalues()), |
156 is_root) | 159 is_root) |
157 del par_map[k] | 160 del par_map[k] |
158 for v in par_map.itervalues(): | 161 for v in par_map.itervalues(): |
159 v.difference_update(empty_keys) | 162 v.difference_update(empty_keys) |
160 is_root = False | 163 is_root = False |
161 | 164 |
| 165 def add_partial(self, commit, parent=None): |
| 166 if commit not in self.par_map: |
| 167 self.par_map[commit] = OrderedSet() |
| 168 if parent is not None: |
| 169 self.par_map[commit].add(parent) |
| 170 |
162 def add_commits(self, schema): | 171 def add_commits(self, schema): |
163 """Adds more commits from a schema into the existing Schema. | 172 """Adds more commits from a schema into the existing Schema. |
164 | 173 |
165 Args: | 174 Args: |
166 schema (str) - See class docstring for info on schema format. | 175 schema (str) - See class docstring for info on schema format. |
167 | 176 |
168 Throws an AssertionError if it detects a cycle. | 177 Throws an AssertionError if it detects a cycle. |
169 """ | 178 """ |
170 for commits in (l.split() for l in schema.splitlines() if l.strip()): | 179 for commits in (l.split() for l in schema.splitlines() if l.strip()): |
171 parent = None | 180 parent = None |
172 for commit in commits: | 181 for commit in commits: |
173 if commit not in self.par_map: | 182 self.add_partial(commit, parent) |
174 self.par_map[commit] = OrderedSet() | |
175 if parent is not None: | |
176 self.par_map[commit].add(parent) | |
177 parent = commit | 183 parent = commit |
178 if parent and not self.master: | 184 if parent and not self.master: |
179 self.master = parent | 185 self.master = parent |
180 for _ in self.walk(): # This will throw if there are any cycles. | 186 for _ in self.walk(): # This will throw if there are any cycles. |
181 pass | 187 pass |
182 | 188 |
183 def reify(self): | 189 def reify(self): |
184 """Returns a real GitRepo for this GitRepoSchema""" | 190 """Returns a real GitRepo for this GitRepoSchema""" |
185 return GitRepo(self) | 191 return GitRepo(self) |
186 | 192 |
187 def data_for(self, commit): | 193 def data_for(self, commit): |
188 """Obtains the data for |commit|. | 194 """Obtains the data for |commit|. |
189 | 195 |
190 See the docstring on the GitRepo class for the format of the returned data. | 196 See the docstring on the GitRepo class for the format of the returned data. |
191 | 197 |
192 Caches the result on this GitRepoSchema instance. | 198 Caches the result on this GitRepoSchema instance. |
193 """ | 199 """ |
194 if commit not in self.data_cache: | 200 if commit not in self.data_cache: |
195 self.data_cache[commit] = self.content_fn(commit) | 201 self.data_cache[commit] = self.content_fn(commit) |
196 return self.data_cache[commit] | 202 return self.data_cache[commit] |
197 | 203 |
| 204 def simple_graph(self): |
| 205 """Returns a dictionary of {commit_subject: {parent commit_subjects}} |
| 206 |
| 207 This allows you to get a very simple connection graph over the whole repo |
| 208 for comparison purposes. Only commit subjects (not ids, not content/data) |
| 209 are considered |
| 210 """ |
| 211 ret = {} |
| 212 for commit in self.walk(): |
| 213 ret.setdefault(commit.name, set()).update(commit.parents) |
| 214 return ret |
| 215 |
198 | 216 |
199 class GitRepo(object): | 217 class GitRepo(object): |
200 """Creates a real git repo for a GitRepoSchema. | 218 """Creates a real git repo for a GitRepoSchema. |
201 | 219 |
202 Obtains schema and content information from the GitRepoSchema. | 220 Obtains schema and content information from the GitRepoSchema. |
203 | 221 |
204 The format for the commit data supplied by GitRepoSchema.data_for is: | 222 The format for the commit data supplied by GitRepoSchema.data_for is: |
205 { | 223 { |
206 SPECIAL_KEY: special_value, | 224 SPECIAL_KEY: special_value, |
207 ... | 225 ... |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
246 GitRepo will automatically clean up all allocated repos at the exit of the | 264 GitRepo will automatically clean up all allocated repos at the exit of the |
247 program (assuming a normal exit like with sys.exit) | 265 program (assuming a normal exit like with sys.exit) |
248 | 266 |
249 Args: | 267 Args: |
250 schema - An instance of GitRepoSchema | 268 schema - An instance of GitRepoSchema |
251 """ | 269 """ |
252 self.repo_path = tempfile.mkdtemp(dir=self.BASE_TEMP_DIR) | 270 self.repo_path = tempfile.mkdtemp(dir=self.BASE_TEMP_DIR) |
253 self.commit_map = {} | 271 self.commit_map = {} |
254 self._date = datetime.datetime(1970, 1, 1) | 272 self._date = datetime.datetime(1970, 1, 1) |
255 | 273 |
| 274 self.to_schema_refs = ['--branches'] |
| 275 |
256 self.git('init') | 276 self.git('init') |
| 277 self.git('config', 'user.name', 'testcase') |
| 278 self.git('config', 'user.email', 'testcase@example.com') |
257 for commit in schema.walk(): | 279 for commit in schema.walk(): |
258 self._add_schema_commit(commit, schema.data_for(commit.name)) | 280 self._add_schema_commit(commit, schema.data_for(commit.name)) |
259 self.last_commit = self[commit.name] | 281 self.last_commit = self[commit.name] |
260 if schema.master: | 282 if schema.master: |
261 self.git('update-ref', 'master', self[schema.master]) | 283 self.git('update-ref', 'refs/heads/master', self[schema.master]) |
262 | 284 |
263 def __getitem__(self, commit_name): | 285 def __getitem__(self, commit_name): |
264 """Gets the hash of a commit by its schema name. | 286 """Gets the hash of a commit by its schema name. |
265 | 287 |
266 >>> r = GitRepo(GitRepoSchema('A B C')) | 288 >>> r = GitRepo(GitRepoSchema('A B C')) |
267 >>> r['B'] | 289 >>> r['B'] |
268 '7381febe1da03b09da47f009963ab7998a974935' | 290 '7381febe1da03b09da47f009963ab7998a974935' |
269 """ | 291 """ |
270 return self.commit_map[commit_name] | 292 return self.commit_map[commit_name] |
271 | 293 |
272 def _add_schema_commit(self, commit, data): | 294 def _add_schema_commit(self, commit, commit_data): |
273 data = data or {} | 295 commit_data = commit_data or {} |
274 | 296 |
275 if commit.parents: | 297 if commit.parents: |
276 parents = list(commit.parents) | 298 parents = list(commit.parents) |
277 self.git('checkout', '--detach', '-q', self[parents[0]]) | 299 self.git('checkout', '--detach', '-q', self[parents[0]]) |
278 if len(parents) > 1: | 300 if len(parents) > 1: |
279 self.git('merge', '--no-commit', '-q', *[self[x] for x in parents[1:]]) | 301 self.git('merge', '--no-commit', '-q', *[self[x] for x in parents[1:]]) |
280 else: | 302 else: |
281 self.git('checkout', '--orphan', 'root_%s' % commit.name) | 303 self.git('checkout', '--orphan', 'root_%s' % commit.name) |
282 self.git('rm', '-rf', '.') | 304 self.git('rm', '-rf', '.') |
283 | 305 |
284 env = {} | 306 env = self.get_git_commit_env(commit_data) |
285 for prefix in ('AUTHOR', 'COMMITTER'): | |
286 for suffix in ('NAME', 'EMAIL', 'DATE'): | |
287 singleton = '%s_%s' % (prefix, suffix) | |
288 key = getattr(self, singleton) | |
289 if key in data: | |
290 val = data[key] | |
291 else: | |
292 if suffix == 'DATE': | |
293 val = self._date | |
294 self._date += datetime.timedelta(days=1) | |
295 else: | |
296 val = getattr(self, 'DEFAULT_%s' % singleton) | |
297 env['GIT_%s' % singleton] = str(val) | |
298 | 307 |
299 for fname, file_data in data.iteritems(): | 308 for fname, file_data in commit_data.iteritems(): |
300 deleted = False | 309 deleted = False |
301 if 'data' in file_data: | 310 if 'data' in file_data: |
302 data = file_data.get('data') | 311 data = file_data.get('data') |
303 if data is None: | 312 if data is None: |
304 deleted = True | 313 deleted = True |
305 self.git('rm', fname) | 314 self.git('rm', fname) |
306 else: | 315 else: |
307 path = os.path.join(self.repo_path, fname) | 316 path = os.path.join(self.repo_path, fname) |
308 pardir = os.path.dirname(path) | 317 pardir = os.path.dirname(path) |
309 if not os.path.exists(pardir): | 318 if not os.path.exists(pardir): |
310 os.makedirs(pardir) | 319 os.makedirs(pardir) |
311 with open(path, 'wb') as f: | 320 with open(path, 'wb') as f: |
312 f.write(data) | 321 f.write(data) |
313 | 322 |
314 mode = file_data.get('mode') | 323 mode = file_data.get('mode') |
315 if mode and not deleted: | 324 if mode and not deleted: |
316 os.chmod(path, mode) | 325 os.chmod(path, mode) |
317 | 326 |
318 self.git('add', fname) | 327 self.git('add', fname) |
319 | 328 |
320 rslt = self.git('commit', '--allow-empty', '-m', commit.name, env=env) | 329 rslt = self.git('commit', '--allow-empty', '-m', commit.name, env=env) |
321 assert rslt.retcode == 0, 'Failed to commit %s' % str(commit) | 330 assert rslt.retcode == 0, 'Failed to commit %s' % str(commit) |
322 self.commit_map[commit.name] = self.git('rev-parse', 'HEAD').stdout.strip() | 331 self.commit_map[commit.name] = self.git('rev-parse', 'HEAD').stdout.strip() |
323 self.git('tag', 'tag_%s' % commit.name, self[commit.name]) | 332 self.git('tag', 'tag_%s' % commit.name, self[commit.name]) |
324 if commit.is_branch: | 333 if commit.is_branch: |
325 self.git('branch', '-f', 'branch_%s' % commit.name, self[commit.name]) | 334 self.git('branch', '-f', 'branch_%s' % commit.name, self[commit.name]) |
326 | 335 |
| 336 def get_git_commit_env(self, commit_data=None): |
| 337 commit_data = commit_data or {} |
| 338 env = {} |
| 339 for prefix in ('AUTHOR', 'COMMITTER'): |
| 340 for suffix in ('NAME', 'EMAIL', 'DATE'): |
| 341 singleton = '%s_%s' % (prefix, suffix) |
| 342 key = getattr(self, singleton) |
| 343 if key in commit_data: |
| 344 val = commit_data[key] |
| 345 else: |
| 346 if suffix == 'DATE': |
| 347 val = self._date |
| 348 self._date += datetime.timedelta(days=1) |
| 349 else: |
| 350 val = getattr(self, 'DEFAULT_%s' % singleton) |
| 351 env['GIT_%s' % singleton] = str(val) |
| 352 return env |
| 353 |
| 354 |
327 def git(self, *args, **kwargs): | 355 def git(self, *args, **kwargs): |
328 """Runs a git command specified by |args| in this repo.""" | 356 """Runs a git command specified by |args| in this repo.""" |
329 assert self.repo_path is not None | 357 assert self.repo_path is not None |
330 try: | 358 try: |
331 with open(os.devnull, 'wb') as devnull: | 359 with open(os.devnull, 'wb') as devnull: |
332 output = subprocess.check_output( | 360 output = subprocess.check_output( |
333 ('git',) + args, cwd=self.repo_path, stderr=devnull, **kwargs) | 361 ('git',) + args, cwd=self.repo_path, stderr=devnull, **kwargs) |
334 return self.COMMAND_OUTPUT(0, output) | 362 return self.COMMAND_OUTPUT(0, output) |
335 except subprocess.CalledProcessError as e: | 363 except subprocess.CalledProcessError as e: |
336 return self.COMMAND_OUTPUT(e.returncode, e.output) | 364 return self.COMMAND_OUTPUT(e.returncode, e.output) |
337 | 365 |
| 366 def git_commit(self, message): |
| 367 return self.git('commit', '-am', message, env=self.get_git_commit_env()) |
| 368 |
338 def nuke(self): | 369 def nuke(self): |
339 """Obliterates the git repo on disk. | 370 """Obliterates the git repo on disk. |
340 | 371 |
341 Causes this GitRepo to be unusable. | 372 Causes this GitRepo to be unusable. |
342 """ | 373 """ |
343 shutil.rmtree(self.repo_path) | 374 shutil.rmtree(self.repo_path) |
344 self.repo_path = None | 375 self.repo_path = None |
345 | 376 |
346 def run(self, fn, *args, **kwargs): | 377 def run(self, fn, *args, **kwargs): |
347 """Run a python function with the given args and kwargs with the cwd set to | 378 """Run a python function with the given args and kwargs with the cwd set to |
348 the git repo.""" | 379 the git repo.""" |
349 assert self.repo_path is not None | 380 assert self.repo_path is not None |
350 curdir = os.getcwd() | 381 curdir = os.getcwd() |
351 try: | 382 try: |
352 os.chdir(self.repo_path) | 383 os.chdir(self.repo_path) |
353 return fn(*args, **kwargs) | 384 return fn(*args, **kwargs) |
354 finally: | 385 finally: |
355 os.chdir(curdir) | 386 os.chdir(curdir) |
356 | 387 |
| 388 def capture_stdio(self, fn, *args, **kwargs): |
| 389 """Run a python function with the given args and kwargs with the cwd set to |
| 390 the git repo. |
| 391 |
| 392 Returns the (stdout, stderr) of whatever ran, instead of the what |fn| |
| 393 returned. |
| 394 """ |
| 395 stdout = sys.stdout |
| 396 stderr = sys.stderr |
| 397 try: |
| 398 sys.stdout = StringIO() |
| 399 sys.stderr = StringIO() |
| 400 try: |
| 401 self.run(fn, *args, **kwargs) |
| 402 except SystemExit: |
| 403 pass |
| 404 return sys.stdout.getvalue(), sys.stderr.getvalue() |
| 405 finally: |
| 406 sys.stdout = stdout |
| 407 sys.stderr = stderr |
| 408 |
| 409 def open(self, path, mode='rb'): |
| 410 return open(os.path.join(self.repo_path, path), mode) |
| 411 |
| 412 def to_schema(self): |
| 413 lines = self.git('rev-list', '--parents', '--reverse', '--topo-order', |
| 414 '--format=%s', *self.to_schema_refs).stdout.splitlines() |
| 415 hash_to_msg = {} |
| 416 ret = GitRepoSchema() |
| 417 current = None |
| 418 parents = [] |
| 419 for line in lines: |
| 420 if line.startswith('commit'): |
| 421 assert current is None |
| 422 tokens = line.split() |
| 423 current, parents = tokens[1], tokens[2:] |
| 424 assert all(p in hash_to_msg for p in parents) |
| 425 else: |
| 426 assert current is not None |
| 427 hash_to_msg[current] = line |
| 428 ret.add_partial(line) |
| 429 for parent in parents: |
| 430 ret.add_partial(line, hash_to_msg[parent]) |
| 431 current = None |
| 432 parents = [] |
| 433 assert current is None |
| 434 return ret |
| 435 |
357 | 436 |
358 class GitRepoSchemaTestBase(unittest.TestCase): | 437 class GitRepoSchemaTestBase(unittest.TestCase): |
359 """A TestCase with a built-in GitRepoSchema. | 438 """A TestCase with a built-in GitRepoSchema. |
360 | 439 |
361 Expects a class variable REPO to be a GitRepoSchema string in the form | 440 Expects a class variable REPO_SCHEMA to be a GitRepoSchema string in the form |
362 described by that class. | 441 described by that class. |
363 | 442 |
364 You may also set class variables in the form COMMIT_%(commit_name)s, which | 443 You may also set class variables in the form COMMIT_%(commit_name)s, which |
365 provide the content for the given commit_name commits. | 444 provide the content for the given commit_name commits. |
366 | 445 |
367 You probably will end up using either GitRepoReadOnlyTestBase or | 446 You probably will end up using either GitRepoReadOnlyTestBase or |
368 GitRepoReadWriteTestBase for real tests. | 447 GitRepoReadWriteTestBase for real tests. |
369 """ | 448 """ |
370 REPO = None | 449 REPO_SCHEMA = None |
371 | 450 |
372 @classmethod | 451 @classmethod |
373 def getRepoContent(cls, commit): | 452 def getRepoContent(cls, commit): |
374 return getattr(cls, 'COMMIT_%s' % commit, None) | 453 return getattr(cls, 'COMMIT_%s' % commit, None) |
375 | 454 |
376 @classmethod | 455 @classmethod |
377 def setUpClass(cls): | 456 def setUpClass(cls): |
378 super(GitRepoSchemaTestBase, cls).setUpClass() | 457 super(GitRepoSchemaTestBase, cls).setUpClass() |
379 assert cls.REPO is not None | 458 assert cls.REPO_SCHEMA is not None |
380 cls.r_schema = GitRepoSchema(cls.REPO, cls.getRepoContent) | 459 cls.r_schema = GitRepoSchema(cls.REPO_SCHEMA, cls.getRepoContent) |
381 | 460 |
382 | 461 |
383 class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase): | 462 class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase): |
384 """Injects a GitRepo object given the schema and content from | 463 """Injects a GitRepo object given the schema and content from |
385 GitRepoSchemaTestBase into TestCase classes which subclass this. | 464 GitRepoSchemaTestBase into TestCase classes which subclass this. |
386 | 465 |
387 This GitRepo will appear as self.repo, and will be deleted and recreated once | 466 This GitRepo will appear as self.repo, and will be deleted and recreated once |
388 for the duration of all the tests in the subclass. | 467 for the duration of all the tests in the subclass. |
389 """ | 468 """ |
390 REPO = None | 469 REPO_SCHEMA = None |
391 | 470 |
392 @classmethod | 471 @classmethod |
393 def setUpClass(cls): | 472 def setUpClass(cls): |
394 super(GitRepoReadOnlyTestBase, cls).setUpClass() | 473 super(GitRepoReadOnlyTestBase, cls).setUpClass() |
395 assert cls.REPO is not None | 474 assert cls.REPO_SCHEMA is not None |
396 cls.repo = cls.r_schema.reify() | 475 cls.repo = cls.r_schema.reify() |
397 | 476 |
398 def setUp(self): | 477 def setUp(self): |
399 self.repo.git('checkout', '-f', self.repo.last_commit) | 478 self.repo.git('checkout', '-f', self.repo.last_commit) |
400 | 479 |
401 @classmethod | 480 @classmethod |
402 def tearDownClass(cls): | 481 def tearDownClass(cls): |
403 cls.repo.nuke() | 482 cls.repo.nuke() |
404 super(GitRepoReadOnlyTestBase, cls).tearDownClass() | 483 super(GitRepoReadOnlyTestBase, cls).tearDownClass() |
405 | 484 |
406 | 485 |
407 class GitRepoReadWriteTestBase(GitRepoSchemaTestBase): | 486 class GitRepoReadWriteTestBase(GitRepoSchemaTestBase): |
408 """Injects a GitRepo object given the schema and content from | 487 """Injects a GitRepo object given the schema and content from |
409 GitRepoSchemaTestBase into TestCase classes which subclass this. | 488 GitRepoSchemaTestBase into TestCase classes which subclass this. |
410 | 489 |
411 This GitRepo will appear as self.repo, and will be deleted and recreated for | 490 This GitRepo will appear as self.repo, and will be deleted and recreated for |
412 each test function in the subclass. | 491 each test function in the subclass. |
413 """ | 492 """ |
414 REPO = None | 493 REPO_SCHEMA = None |
415 | 494 |
416 def setUp(self): | 495 def setUp(self): |
417 super(GitRepoReadWriteTestBase, self).setUp() | 496 super(GitRepoReadWriteTestBase, self).setUp() |
418 self.repo = self.r_schema.reify() | 497 self.repo = self.r_schema.reify() |
419 | 498 |
420 def tearDown(self): | 499 def tearDown(self): |
421 self.repo.nuke() | 500 self.repo.nuke() |
422 super(GitRepoReadWriteTestBase, self).tearDown() | 501 super(GitRepoReadWriteTestBase, self).tearDown() |
| 502 |
| 503 def assertSchema(self, schema_string): |
| 504 self.assertEqual(GitRepoSchema(schema_string).simple_graph(), |
| 505 self.repo.to_schema().simple_graph()) |
OLD | NEW |