client/cipd.py - Issue 2847153002: Cache/retrieve extracted CIPD packages in local isolate cache

Unified Diff: client/cipd.py

Issue 2847153002: Cache/retrieve extracted CIPD packages in local isolate cache (Closed)

Patch Set: clean up formatting Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: client/cipd.py

diff --git a/client/cipd.py b/client/cipd.py

index 6fb16324368a20f75f121cf4ecdbe24c402ca925..826398570a68d5e7cd8620f124b442378033e9a3 100644

--- a/client/cipd.py

+++ b/client/cipd.py

@@ -11,6 +11,8 @@ import logging

import optparse

import os

import platform

+import re

+import shutil

import sys

import tempfile

import time

@@ -139,13 +141,150 @@ class CipdClient(object):

self.package_name = package_name

self.instance_id = instance_id

self.service_url = service_url

+ self._cache_hash = hashlib.sha1

+ self._cache_hash_algo = (

+ isolated_format.SUPPORTED_ALGOS_REVERSE[self._cache_hash])

+ def _ensure_from_isolate(self, target_dir, cipd_isolated, isolate_cache):

+ """

+ Retrieves the CIPD subdirectories from the isolate cache, if they exist,

+ hardlinking or copying the files into the provided directory.

+ Does the opposite of _isolate_cipd for a given cipd_isolated file.

+ Args:

+ target_dir: directory in which to put the files.

+ cipd_isolated: the isolated.[hash] file created previously in

+ _isolate_cipd

+ isolate_cache: the isolateserver.DiskCache in which the files may be

+ stored.

+ Returns:

+ True if the isolated file and all the isolated contents were successfully

+ found in the isolate cache and put into place using hardlinks with a copy

+ fallback. False otherwise.

+ """

+ if not isolate_cache:

+ logging.info('Not ensuring cipd from isolate cache isolate_cache is not'

+ 'defined: %s', isolate_cache)

+ return False

+ try:

+ with open(cipd_isolated , 'r') as f:

M-A Ruel 2017/05/08 20:43:16 'rb' it makes it faster on windows.

kjlubick 2017/05/09 17:38:25 Done.

+ digest = str(f.read())

+ try:

+ content = isolate_cache.getfileobj(digest).read()

+ except Exception as e:

+ logging.warning('Could not find isolated file in cache with digest '

+ '%s: %s', digest, e)

+ return False

+ ifile = isolated_format.IsolatedFile(digest, self._cache_hash)

M-A Ruel 2017/05/08 20:43:16 This part should be a function not a method. I'm t

kjlubick 2017/05/09 17:38:25 Done.

+ ifile.load(content)

+ file_path.ensure_tree(target_dir)

+ files = ifile.data.get(u'files', {})

+ for f in files.keys():

M-A Ruel 2017/05/08 20:43:16 for f, props in files.iteritems():

kjlubick 2017/05/09 17:38:25 I can't - I get "Too many values to unpack" Comme

+ props = files.get(f, None)

+ if not props:

+ logging.warning('Problem getting info for %s', f)

+ return False

+ file_mode = props.get('m', None)

+ if file_mode:

+ # Ignore all bits apart from the user

+ file_mode &= 0700

+ dstpath = os.path.join(target_dir, f)

+ file_path.ensure_tree(os.path.dirname(dstpath))

+ digest = props.get('h', None)

+ if not digest:

+ logging.warning('Hash can\'t be empty %s', f)

+ return False

+ srcpath = isolate_cache.getfileobj(digest).name

+ file_path.link_file(unicode(dstpath), unicode(srcpath),

+ file_path.HARDLINK_WITH_FALLBACK)

+ if file_mode is not None:

+ fs.chmod(dstpath, file_mode)

+ except Exception as e:

+ logging.warning('Could not ensure cipd package from isolate %s', e)

+ return False

+ return True

+ def _isolate_cipd(self, root, subdirs, isolate_cache, cipd_cache):

kjlubick 2017/05/08 18:59:29 This doesn't support includes like we mentioned.

+ """

+ Puts the content of the CIPD subdirectories into the isolate cache,

+ creating a .isolated file representing each subdirectory. This .isolated

+ file goes into the isolate_cache as well, and a .isolated.[hash] file

+ goes into cipd_cache for extraction in _ensure_from_isolate(). The suffix

+ will be related to self._cache_hash_algo (.sha-1 for SHA-1, etc)

+ This allows for disks with slow I/O (e.g. Raspberry Pis) to not have to

+ re-extract the CIPD zips every time, potentially saving a bunch of time.

+ Arg:

+ root: where packages are installed.

+ subdirs: dict of subdir -> name_verision where name_version is

M-A Ruel 2017/05/08 20:43:16 name_version

kjlubick 2017/05/09 17:38:25 Done.

+ [subdir].[pkg1version_pkg2version...].isolated.[hash] This way, if

+ any packages are updated, the cached files will also change.

+ isolate_cache: A isolateserver.DiskCache used to store files locally.

+ cipd_cache: A directory in which to put the *isolated.[hash] files.

+ """

+ if not isolate_cache or not os.path.isdir(cipd_cache):

+ logging.info('Not putting cipd into isolate cache because one of the'

+ 'caches is empty: %s, %s', isolate_cache, cipd_cache)

+ return

+ for subdir, hashfile in subdirs.iteritems():

+ if not subdir:

+ logging.info('not caching to %s because it extracts to ./', hashfile)

+ continue

+ # The subdirs given to us from cipd are always in foo/bar/baz format

+ # which freaks Windows out.

+ subdir = subdir.replace('/', os.path.sep)

+ subdir = os.path.join(root, subdir)

+ if not os.path.isdir(subdir):

+ logging.warning('%r is not a directory, so it can\'t be isolated',

+ subdir)

+ continue

+ infiles, metadata = isolateserver.directory_to_metadata(

+ subdir, self._cache_hash, [], True)

+ # The .isolated file to be created on disk. hashfile represents the

+ # file that will also be created along with this.

+ # e.g. *.isolated.sha-1 if the sha1 algorithm is used

+ isolated_file = unicode(os.path.join(cipd_cache, hashfile[:-5]))

+ data = {

+ 'algo': self._cache_hash_algo,

+ 'files': metadata,

+ 'version': isolated_format.ISOLATED_FILE_VERSION,

+ }

+ isolated_format.save_isolated(isolated_file, data)

+ for infile in infiles:

+ with open(os.path.join(subdir, infile.path) , 'r') as f:

M-A Ruel 2017/05/08 20:43:16 'rb'

kjlubick 2017/05/09 17:38:25 Done.

+ isolate_cache.write(infile.digest, f)

+ digest = isolated_format.hash_file(isolated_file, self._cache_hash)

+ with open(isolated_file , 'r') as f:

M-A Ruel 2017/05/08 20:43:16 'rb'

kjlubick 2017/05/09 17:38:25 Done

+ content = f.read()

+ isolate_cache.write(digest, content)

M-A Ruel 2017/05/08 20:43:16 you can calculate hash here, so the file is not re

kjlubick 2017/05/09 17:38:25 Done.

+ with open(os.path.join(cipd_cache, hashfile), 'w') as f:

+ f.write(digest)

def ensure(

- self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None):

+ self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None,

+ isolate_cache=None):

"""Ensures that packages installed in |site_root| equals |packages| set.

Blocking call.

+ Attempts to use the isolate cache to store the unzipped cipd files, keeping

+ a .isolated file in the cipd cache_dir

Args:

site_root (str): where to install packages.

packages: dict of subdir -> list of (package_template, version) tuples.

@@ -153,6 +292,8 @@ class CipdClient(object):

Typically contains packages and tags.

tmp_dir (str): if not None, dir for temp files.

timeout (int): if not None, timeout in seconds for this function to run.

+ isolate_cache (isolateserver.DiskCache): if not None, CIPD assets will

+ be unzipped and stored in this disk cache and extracted from there.

Returns:

Pinned packages in the form of {subdir: [(package_name, package_id)]},

@@ -163,26 +304,49 @@ class CipdClient(object):

"""

timeoutfn = tools.sliding_timeout(timeout)

logging.info('Installing packages %r into %s', packages, site_root)

ensure_file_handle, ensure_file_path = tempfile.mkstemp(

dir=tmp_dir, prefix=u'cipd-ensure-file-', suffix='.txt')

json_out_file_handle, json_file_path = tempfile.mkstemp(

dir=tmp_dir, prefix=u'cipd-ensure-result-', suffix='.json')

os.close(json_out_file_handle)

+ if cache_dir:

+ file_path.ensure_tree(unicode(cache_dir))

M-A Ruel 2017/05/08 20:43:16 I'd prefer to update call sites instead.

kjlubick 2017/05/09 17:38:25 Done. Only run_isolated needed it.

+ to_isolate = {}

+ from_isolate = {}

try:

for subdir, pkgs in sorted(packages.iteritems()):

if '\n' in subdir:

raise Error(

'Could not install packages; subdir %r contains newline' % subdir)

+ # Join all the versions together so as to cause a new cached isolated

+ # to be used if any of them change.

+ versions = [p[1] for p in pkgs]

M-A Ruel 2017/05/08 20:43:16 why not each package be individually packed?

kjlubick 2017/05/09 17:38:25 I had tried that originally, but that gets tricky

M-A Ruel 2017/05/10 13:31:58 Hummm I still think it's not a great idea. Since t

+ cipd_isolated = '%s.%s.isolated.%s' % (subdir, '_'.join(versions),

M-A Ruel 2017/05/08 20:43:16 u

kjlubick 2017/05/09 17:38:25 Done.

+ self._cache_hash_algo)

+ cipd_isolated = os.path.join(cache_dir, cipd_isolated)

+ if (os.path.isfile(cipd_isolated) and

+ self._ensure_from_isolate(os.path.join(site_root, subdir),

+ cipd_isolated, isolate_cache)):

+ from_isolate[unicode(subdir)] = pkgs

+ continue

+ to_isolate[subdir] = cipd_isolated

os.write(ensure_file_handle, '@Subdir %s\n' % (subdir,))

for pkg, version in pkgs:

pkg = render_package_name_template(pkg)

os.write(ensure_file_handle, '%s %s\n' % (pkg, version))

finally:

os.close(ensure_file_handle)

+ # to_isolate is the packages that we need to ensure from CIPD and then

+ # isolate. Thus, if this is empty, we don't need to get anything from

+ # CIPD because they were successfully pulled from isolate. Thus return

+ # from_isolate, the pinned packages that we pulled from_isolate

+ if not to_isolate:

+ return from_isolate

cmd = [

self.binary_path, 'ensure',

'-root', site_root,

@@ -219,12 +383,16 @@ class CipdClient(object):

raise Error(

'Could not install packages; exit code %d\noutput:%s' % (

exit_code, '\n'.join(output)))

+ self._isolate_cipd(site_root, to_isolate, isolate_cache, cache_dir)

with open(json_file_path) as jfile:

result_json = json.load(jfile)

- return {

+ from_isolate.update({

subdir: [(x['package'], x['instance_id']) for x in pins]

for subdir, pins in result_json['result'].iteritems()

- }

+ })

+ return from_isolate

finally:

fs.remove(ensure_file_path)

fs.remove(json_file_path)

« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »