Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(218)

Unified Diff: client/cipd.py

Issue 2847153002: Cache/retrieve extracted CIPD packages in local isolate cache (Closed)
Patch Set: Cache cipd packages individually (for peak freshness) Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: client/cipd.py
diff --git a/client/cipd.py b/client/cipd.py
index 6fb16324368a20f75f121cf4ecdbe24c402ca925..d12616bbe31ea872f6a99ee2ff941c6f08b1b3ba 100644
--- a/client/cipd.py
+++ b/client/cipd.py
@@ -11,6 +11,8 @@ import logging
import optparse
import os
import platform
+import re
+import shutil
import sys
import tempfile
import time
@@ -139,13 +141,20 @@ class CipdClient(object):
self.package_name = package_name
self.instance_id = instance_id
self.service_url = service_url
+ self._cache_hash = hashlib.sha1
+ self._cache_hash_algo = (
+ isolated_format.SUPPORTED_ALGOS_REVERSE[self._cache_hash])
def ensure(
- self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None):
+ self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None,
+ isolate_cache=None):
"""Ensures that packages installed in |site_root| equals |packages| set.
Blocking call.
+ Attempts to use the isolate cache to store the unzipped cipd files, keeping
+ a .isolated file in the cipd cache_dir
+
Args:
site_root (str): where to install packages.
packages: dict of subdir -> list of (package_template, version) tuples.
@@ -153,6 +162,8 @@ class CipdClient(object):
Typically contains packages and tags.
tmp_dir (str): if not None, dir for temp files.
timeout (int): if not None, timeout in seconds for this function to run.
+ isolate_cache (isolateserver.DiskCache): if not None, CIPD assets will
+ be unzipped and stored in this disk cache and extracted from there.
Returns:
Pinned packages in the form of {subdir: [(package_name, package_id)]},
@@ -163,29 +174,50 @@ class CipdClient(object):
"""
timeoutfn = tools.sliding_timeout(timeout)
logging.info('Installing packages %r into %s', packages, site_root)
-
ensure_file_handle, ensure_file_path = tempfile.mkstemp(
dir=tmp_dir, prefix=u'cipd-ensure-file-', suffix='.txt')
json_out_file_handle, json_file_path = tempfile.mkstemp(
dir=tmp_dir, prefix=u'cipd-ensure-result-', suffix='.json')
os.close(json_out_file_handle)
-
+ to_isolate = {}
+ from_isolate = {}
try:
try:
for subdir, pkgs in sorted(packages.iteritems()):
if '\n' in subdir:
raise Error(
'Could not install packages; subdir %r contains newline' % subdir)
- os.write(ensure_file_handle, '@Subdir %s\n' % (subdir,))
+
for pkg, version in pkgs:
pkg = render_package_name_template(pkg)
- os.write(ensure_file_handle, '%s %s\n' % (pkg, version))
+
+ cipd_isolated = u'%s.%s.isolated.%s' % (pkg, version,
+ self._cache_hash_algo)
+ cipd_isolated = os.path.join(cache_dir, cipd_isolated)
+ if (self._ensure_from_isolate(os.path.join(site_root, subdir),
+ cipd_isolated, isolate_cache)):
+ from_isolate.setdefault(subdir, []).append((pkg, version))
+ else:
+ # we will need to pull it from cipd
+ to_isolate[pkg] = cipd_isolated
+ os.write(ensure_file_handle, '@Subdir %s\n' % pkg)
M-A Ruel 2017/05/10 20:00:57 One problem with this code is that the cipd packag
kjlubick 2017/05/10 20:20:59 I'm not quite sure what you mean by "cipd package
+ os.write(ensure_file_handle, '%s %s\n' % (pkg, version))
+
finally:
os.close(ensure_file_handle)
+ # to_isolate is the packages that we need to ensure from CIPD and then
+ # isolate. Thus, if this is empty, we don't need to get anything from
+ # CIPD because they were successfully pulled from isolate. Thus return
+ # from_isolate, the pinned packages that we pulled from_isolate
+ if not to_isolate:
+ return from_isolate
+
+ # call ensure once to put cipd packages in temp dir for caching
+ temp_root = tempfile.mkdtemp()
M-A Ruel 2017/05/10 20:00:57 prefix=u'cpid' this directory is never deleted?
kjlubick 2017/05/10 20:20:59 Done.
cmd = [
self.binary_path, 'ensure',
- '-root', site_root,
+ '-root', temp_root,
'-ensure-file', ensure_file_path,
'-verbose', # this is safe because cipd-ensure does not print a lot
'-json-output', json_file_path,
@@ -219,16 +251,154 @@ class CipdClient(object):
raise Error(
'Could not install packages; exit code %d\noutput:%s' % (
exit_code, '\n'.join(output)))
- with open(json_file_path) as jfile:
- result_json = json.load(jfile)
- return {
- subdir: [(x['package'], x['instance_id']) for x in pins]
- for subdir, pins in result_json['result'].iteritems()
- }
+
+ # isolate them
+ self._isolate_cipd(temp_root, to_isolate, isolate_cache, cache_dir)
+
+ # pull them out of isolate into their location
+ for subdir, pkgs in sorted(packages.iteritems()):
+
M-A Ruel 2017/05/10 20:00:57 remove empty line
kjlubick 2017/05/10 20:20:59 Done.
+ for pkg, version in pkgs:
+ pkg = render_package_name_template(pkg)
+ if pkg not in to_isolate:
+ # The package was already pulled from cache earlier.
+ continue
+
+ cipd_isolated = u'%s.%s.isolated.%s' % (pkg, version,
+ self._cache_hash_algo)
+ cipd_isolated = os.path.join(cache_dir, cipd_isolated)
+ if (self._ensure_from_isolate(os.path.join(site_root, subdir),
M-A Ruel 2017/05/10 20:00:57 wrapping () not necessary. I'd prefer to reverse
kjlubick 2017/05/10 20:20:59 Done.
+ cipd_isolated, isolate_cache)):
+ from_isolate.setdefault(subdir, []).append((pkg, version))
+ else:
+ raise Error('cipd package %s not able to be pulled from isolate '
+ 'cache after being put there' % cipd_isolated)
+
+ return from_isolate
finally:
fs.remove(ensure_file_path)
fs.remove(json_file_path)
+ def _ensure_from_isolate(self, target_dir, cipd_isolated, isolate_cache):
+ """Retrieves the CIPD packages from the isolate cache, if they exist.
+
+ This hardlinks or copies the files into the provided directory. It
+ basically does the opposite of _isolate_cipd for a given cipd_isolated
+ file.
+
+ Args:
+ target_dir: directory in which to put the files
+ cipd_isolated: the isolated.[hash] file created previously in
+ _isolate_cipd
+ isolate_cache: the isolateserver.DiskCache in which the files may be
+ stored
+
+ Returns:
+ True if the isolated file and all the isolated contents were successfully
+ found in the isolate cache and put into place using hardlinks with a copy
+ fallback. False otherwise.
+ """
+ if not os.path.isfile(cipd_isolated):
+ logging.info('Not ensuring cipd from isolate cache cipd_isolated %s is '
+ 'missing', cipd_isolated)
+ return False
+ if not isolate_cache:
+ logging.info('Not ensuring cipd from isolate cache isolate_cache is not'
+ 'defined: %s', isolate_cache)
+ return False
+ try:
+ with open(cipd_isolated , 'rb') as f:
+ digest = f.read()
+ try:
+ content = isolate_cache.getfileobj(digest).read()
M-A Ruel 2017/05/10 20:00:56 with isolate_cache.getfileobj(digest) as f: cont
kjlubick 2017/05/10 20:20:59 Done.
+ except Exception as e:
M-A Ruel 2017/05/10 20:00:56 too broad, you want isolateserver.CacheMiss
kjlubick 2017/05/10 20:21:00 Done.
+ logging.warning('Could not find isolated file in cache with digest '
+ '%s: %s', digest, e)
+ return False
+
+ ifile = isolated_format.IsolatedFile(digest, self._cache_hash)
+ ifile.load(content)
+
+ if not isolateserver.is_cached(ifile, isolate_cache):
+ logging.info('Cached CIPD asset(s) %s are incomplete', cipd_isolated)
+ return False
+
+ file_path.ensure_tree(target_dir)
+ return isolateserver.extract(ifile, target_dir, isolate_cache)
+ except OSError as e:
M-A Ruel 2017/05/10 20:00:57 I'm wondering; it would apply to lines 310 and 311
kjlubick 2017/05/10 20:20:59 Good point. I think I had it for debugging asserts
+ logging.warning('Could not ensure cipd package from isolate %s', e)
+ return False
+
+ return True
+
+ def _isolate_cipd(self, root, pkgs, isolate_cache, cipd_cache):
+ """Puts the content of the CIPD subdirectories into the isolate cache.
+
+ This creates a .isolated file representing each subdirectory. This .isolated
+ file goes into the isolate_cache as well, and a .isolated.[hash] file
+ goes into cipd_cache for extraction in _ensure_from_isolate(). The suffix
+ will be related to self._cache_hash_algo (.sha-1 for SHA-1, etc)
+
+ This allows for disks with slow I/O (e.g. Raspberry Pis) to not have to
+ re-extract the CIPD zips every time, potentially saving a bunch of time.
+
+ It is assumed that the cipd packages have been extracted to root/name.
+
+ Arg:
+ root: where packages are installed
+ pkgs: dict of name -> isolated_hash where isolated_hash is
+ [name].[version].isolated.[hash] This is the file to use as the
+ .isolated and its corresponding hash.
+ isolate_cache: A isolateserver.DiskCache used to store files locally
+ cipd_cache: A directory in which to put the *isolated.[hash] files
+ """
+ if not isolate_cache or not os.path.isdir(cipd_cache):
+ logging.info('Not putting cipd into isolate cache because one of the'
+ 'caches is empty: %s, %s', isolate_cache, cipd_cache)
+ return
+ for pkg_dir, hashfile in pkgs.iteritems():
+ if not pkg_dir:
+ logging.info('not caching to %s because it extracts to ./', hashfile)
+ continue
+ # The pkgs given to us from cipd are always in foo/bar/baz format
+ # which freaks Windows out.
+ pkg_dir = pkg_dir.replace('/', os.path.sep)
+ pkg_dir = unicode(os.path.join(root, pkg_dir))
+
+ if not os.path.isdir(pkg_dir):
+ logging.warning('%r is not a directory, so it can\'t be isolated',
+ pkg_dir)
+ continue
+
+ infiles, metadata = isolateserver.directory_to_metadata(
+ pkg_dir, self._cache_hash, [], True)
+
+ # The .isolated file to be created on disk. hashfile represents the
+ # file that will also be created along with this.
+ # e.g. *.isolated.sha-1 if the sha1 algorithm is used
+ isolated_file = unicode(os.path.join(cipd_cache,
M-A Ruel 2017/05/10 20:00:57 inconsistent argument wrapping
kjlubick 2017/05/10 20:20:59 Done.
+ hashfile[:-1-len(self._cache_hash_algo)]))
+ data = {
+ 'algo': self._cache_hash_algo,
+ 'files': metadata,
+ 'version': isolated_format.ISOLATED_FILE_VERSION,
+ }
+ # For packages like skia/bots/foo the isolated_file will end up in
+ # the skia/bots directory, which may not exist.
+ file_path.ensure_tree(os.path.dirname(isolated_file))
+ isolated_format.save_isolated(isolated_file, data)
+
+ for infile in infiles:
+ with open(os.path.join(pkg_dir, infile.path) , 'rb') as f:
+ isolate_cache.write(infile.digest, f)
+
+ with open(isolated_file , 'rb') as f:
+ content = f.read()
+ digest = self._cache_hash(content).hexdigest()
M-A Ruel 2017/05/10 20:00:57 this can be outside the closure
kjlubick 2017/05/10 20:20:59 Done.
+ isolate_cache.write(digest, content)
+
+ with open(os.path.join(cipd_cache, hashfile), 'w') as f:
M-A Ruel 2017/05/10 20:00:56 'wb'
kjlubick 2017/05/10 20:21:00 Done.
+ f.write(digest)
def get_platform():
"""Returns ${platform} parameter value.
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698