Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(33)

Unified Diff: infra/services/gsubtreed/gsubtreed.py

Issue 477623003: Add git subtree daemon service. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@add_dtree_support
Patch Set: test multiple runs, fix some logging silliness Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: infra/services/gsubtreed/gsubtreed.py
diff --git a/infra/services/gsubtreed/gsubtreed.py b/infra/services/gsubtreed/gsubtreed.py
new file mode 100644
index 0000000000000000000000000000000000000000..7891feab212a0ac550b84b65d9d52591f3fba9a2
--- /dev/null
+++ b/infra/services/gsubtreed/gsubtreed.py
@@ -0,0 +1,152 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import collections
+import logging
+import posixpath
+import sys
+
+from infra.libs.git2 import CalledProcessError
+from infra.libs.git2 import INVALID
+from infra.libs.git2 import config_ref
+from infra.libs.git2 import repo
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+################################################################################
+# ConfigRef
+################################################################################
+
+class GsubtreedConfigRef(config_ref.ConfigRef):
+ CONVERT = {
+ 'interval': lambda self, val: float(val),
+ 'subtree_synthesized_prefix': lambda self, val: str(val),
+ 'subtree_processed_prefix': lambda self, val: str(val),
+
+ 'base_url': lambda self, val: str(val) or self.repo.url,
+ 'enabled_refglobs': lambda self, val: map(str, list(val)),
+ # normpath to avoid trailing/double-slash errors.
+ 'enabled_paths': lambda self, val: map(posixpath.normpath, map(str, val)),
+ }
+ DEFAULTS = {
+ 'interval': 5.0,
+ 'subtree_processed_prefix': 'refs/subtree-processed',
+ 'subtree_synthesized_prefix': 'refs/subtree-synthesized',
+ # e.g. while processing the subtree 'b/foo' on refs/heads/master
+ # refs/heads/master <- real commits
+ # refs/subtree-processed/b/foo/-/heads/master <- ancestor tag of master
Vadim Sh. 2014/08/17 19:37:59 Will this refs be created on a first use or some o
iannucci 2014/08/17 22:35:30 It'll do a push... normally that would create it,
+ # refs/subtree-synthesized/b/foo/-/heads/master <- ref with synth commits
+ # For the sake of implementation simplicity, this daemon assumes the
+ # googlesource.com guarantee of transactional multi-ref pushes within a
+ # single repo.
+
+ 'base_url': None,
Vadim Sh. 2014/08/17 19:37:59 Document.
iannucci 2014/08/17 22:35:29 Done.
+ 'enabled_refglobs': ['refs/heads/*'],
+ 'enabled_paths': [],
+ }
+ REF = 'refs/gsubtreed-config/main'
+
+
+
+################################################################################
+# Core functionality
+################################################################################
+
+def process_path(path, origin_repo, config, fake_base_repo_url=None):
+ def join(prefix, ref):
+ assert ref.ref.startswith('refs/')
+ ref = '/'.join((prefix, path)) + '/-/' + ref.ref[len('refs/'):]
+ return origin_repo[ref]
+
+ origin_push = {}
+
+ base_url = fake_base_repo_url or config['base_url']
Vadim Sh. 2014/08/17 19:37:59 can't you mock config['base_url'] in tests to get
iannucci 2014/08/17 22:35:30 Did a thing. ptal.
+ mirror_url = '[FAKE-BASE-URL]' if fake_base_repo_url else origin_repo.url
+
+ subtree_repo = repo.Repo(posixpath.join(base_url, path))
+ subtree_repo.repos_dir = origin_repo.repos_dir
+ subtree_repo.reify(share_from=origin_repo)
Vadim Sh. 2014/08/17 19:37:59 what if repo at base_url is not initially dervied
iannucci 2014/08/17 22:35:30 Yes, it's safe. Object sharing only means that it'
+ subtree_repo.run('fetch', stdout=sys.stdout, stderr=sys.stderr)
+ subtree_repo_push = {}
+
+ synthed_count = 0
+
+ for glob in config['enabled_refglobs']:
+ for ref in origin_repo.refglob(glob):
+ LOGGER.info('processing ref %s', ref)
+ processed = join(config['subtree_processed_prefix'], ref)
+ synthed = join(config['subtree_synthesized_prefix'], ref)
+
+ synth_parent = synthed.commit
+ cur_tree = synthed.commit.data.tree
Vadim Sh. 2014/08/17 19:37:59 is it None if ref doesn't exist yet?
iannucci 2014/08/17 22:35:30 It's INVALID (and thus synthed.commit.data.tree is
+ LOGGER.info('starting with tree %r', cur_tree)
+
+ for commit in processed.to(ref):
+ LOGGER.info('processing commit %s', commit)
+ try:
+ obj_name = '{.hsh}:{}'.format(commit, path)
+ typ = origin_repo.run('cat-file', '-t', obj_name).strip()
+ if typ != 'tree':
+ LOGGER.warn('path %r is not a tree in commit %s', path, commit)
+ continue
+ dir_tree = origin_repo.run('rev-parse', obj_name).strip()
+ except CalledProcessError:
+ continue
+
+ if dir_tree != cur_tree:
+ LOGGER.info('found new tree %r', dir_tree)
+ cur_tree = dir_tree
+ synthed_count += 1
+ synth_parent = commit.alter(
+ parents=[synth_parent.hsh] if synth_parent is not INVALID else [],
+ tree=dir_tree,
+ footers=collections.OrderedDict([
+ ('Cr-Mirrored-From', [mirror_url]),
+ ('Cr-Mirrored-Commit', [commit.hsh]),
Vadim Sh. 2014/08/17 19:37:59 It will also inherit Cr-Commit-Position, right?
iannucci 2014/08/17 22:35:30 Yes, this is just adding footers to whatever's the
+ ('Cr-Mirrored-Subtree', [path]),
+ ]),
+ )
+ origin_push[synthed] = synth_parent
+ subtree_repo_push[subtree_repo[ref.ref]] = synth_parent
+
+ origin_push[processed] = ref.commit
+
+ success = True
+ try:
+ # because the hashes are deterministic based on the real history, if the
+ # first push succeeds, but the second does not, it just means we'll end up
+ # doing a bit of extra work on the next loop, but correctness will still be
+ # ensured
+ subtree_repo.fast_forward_push(subtree_repo_push)
Vadim Sh. 2014/08/17 19:37:59 TODO: push to subtree repos in parallel. This can
iannucci 2014/08/17 22:35:29 Yeah I thought about it. I'll add a comment, but I
+ origin_repo.fast_forward_push(origin_push)
+ except Exception: # pragma: no cover
+ LOGGER.exception('Caught exception while pushing in process_path')
+ success = False
+
+ return success, synthed_count
+
+
+def inner_loop(origin_repo, config, fake_base_repo_url=None):
+ """Returns (success, {path: #commits_synthesized})."""
+
+ LOGGER.debug('fetching %r', origin_repo)
+ origin_repo.run('fetch', stdout=sys.stdout, stderr=sys.stderr)
+ config.evaluate()
+
+ success = True
+ processed = {}
+ for path in config['enabled_paths']:
+ LOGGER.info('processing path %s', path)
+ try:
+ path_success, num_synthed = process_path(path, origin_repo, config,
+ fake_base_repo_url)
+ success = path_success and success
+ processed[path] = num_synthed
+ except Exception: # pragma: no cover
+ LOGGER.exception('Caught in inner_loop')
+ success = False
+
+ return success, processed

Powered by Google App Engine
This is Rietveld 408576698