Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8640)

Unified Diff: client/cipd.py

Issue 2847153002: Cache/retrieve extracted CIPD packages in local isolate cache (Closed)
Patch Set: clean up formatting Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: client/cipd.py
diff --git a/client/cipd.py b/client/cipd.py
index 6fb16324368a20f75f121cf4ecdbe24c402ca925..826398570a68d5e7cd8620f124b442378033e9a3 100644
--- a/client/cipd.py
+++ b/client/cipd.py
@@ -11,6 +11,8 @@ import logging
import optparse
import os
import platform
+import re
+import shutil
import sys
import tempfile
import time
@@ -139,13 +141,150 @@ class CipdClient(object):
self.package_name = package_name
self.instance_id = instance_id
self.service_url = service_url
+ self._cache_hash = hashlib.sha1
+ self._cache_hash_algo = (
+ isolated_format.SUPPORTED_ALGOS_REVERSE[self._cache_hash])
+
+ def _ensure_from_isolate(self, target_dir, cipd_isolated, isolate_cache):
+ """
+ Retrieves the CIPD subdirectories from the isolate cache, if they exist,
+ hardlinking or copying the files into the provided directory.
+
+ Does the opposite of _isolate_cipd for a given cipd_isolated file.
+
+ Args:
+ target_dir: directory in which to put the files.
+ cipd_isolated: the isolated.[hash] file created previously in
+ _isolate_cipd
+ isolate_cache: the isolateserver.DiskCache in which the files may be
+ stored.
+
+ Returns:
+ True if the isolated file and all the isolated contents were successfully
+ found in the isolate cache and put into place using hardlinks with a copy
+ fallback. False otherwise.
+ """
+ if not isolate_cache:
+ logging.info('Not ensuring cipd from isolate cache isolate_cache is not'
+ 'defined: %s', isolate_cache)
+ return False
+ try:
+ with open(cipd_isolated , 'r') as f:
M-A Ruel 2017/05/08 20:43:16 'rb' it makes it faster on windows.
kjlubick 2017/05/09 17:38:25 Done.
+ digest = str(f.read())
+ try:
+ content = isolate_cache.getfileobj(digest).read()
+ except Exception as e:
+ logging.warning('Could not find isolated file in cache with digest '
+ '%s: %s', digest, e)
+ return False
+
+ ifile = isolated_format.IsolatedFile(digest, self._cache_hash)
M-A Ruel 2017/05/08 20:43:16 This part should be a function not a method. I'm t
kjlubick 2017/05/09 17:38:25 Done.
+ ifile.load(content)
+
+ file_path.ensure_tree(target_dir)
+ files = ifile.data.get(u'files', {})
+ for f in files.keys():
M-A Ruel 2017/05/08 20:43:16 for f, props in files.iteritems():
kjlubick 2017/05/09 17:38:25 I can't - I get "Too many values to unpack" Comme
+ props = files.get(f, None)
+ if not props:
+ logging.warning('Problem getting info for %s', f)
+ return False
+ file_mode = props.get('m', None)
+ if file_mode:
+ # Ignore all bits apart from the user
+ file_mode &= 0700
+
+ dstpath = os.path.join(target_dir, f)
+ file_path.ensure_tree(os.path.dirname(dstpath))
+ digest = props.get('h', None)
+ if not digest:
+ logging.warning('Hash can\'t be empty %s', f)
+ return False
+ srcpath = isolate_cache.getfileobj(digest).name
+
+ file_path.link_file(unicode(dstpath), unicode(srcpath),
+ file_path.HARDLINK_WITH_FALLBACK)
+
+ if file_mode is not None:
+ fs.chmod(dstpath, file_mode)
+ except Exception as e:
+ logging.warning('Could not ensure cipd package from isolate %s', e)
+ return False
+
+ return True
+
+ def _isolate_cipd(self, root, subdirs, isolate_cache, cipd_cache):
kjlubick 2017/05/08 18:59:29 This doesn't support includes like we mentioned.
+ """
+ Puts the content of the CIPD subdirectories into the isolate cache,
+ creating a .isolated file representing each subdirectory. This .isolated
+ file goes into the isolate_cache as well, and a .isolated.[hash] file
+ goes into cipd_cache for extraction in _ensure_from_isolate(). The suffix
+ will be related to self._cache_hash_algo (.sha-1 for SHA-1, etc)
+
+ This allows for disks with slow I/O (e.g. Raspberry Pis) to not have to
+ re-extract the CIPD zips every time, potentially saving a bunch of time.
+
+ Arg:
+ root: where packages are installed.
+ subdirs: dict of subdir -> name_verision where name_version is
M-A Ruel 2017/05/08 20:43:16 name_version
kjlubick 2017/05/09 17:38:25 Done.
+ [subdir].[pkg1version_pkg2version...].isolated.[hash] This way, if
+ any packages are updated, the cached files will also change.
+ isolate_cache: A isolateserver.DiskCache used to store files locally.
+ cipd_cache: A directory in which to put the *isolated.[hash] files.
+ """
+ if not isolate_cache or not os.path.isdir(cipd_cache):
+ logging.info('Not putting cipd into isolate cache because one of the'
+ 'caches is empty: %s, %s', isolate_cache, cipd_cache)
+ return
+ for subdir, hashfile in subdirs.iteritems():
+ if not subdir:
+ logging.info('not caching to %s because it extracts to ./', hashfile)
+ continue
+ # The subdirs given to us from cipd are always in foo/bar/baz format
+ # which freaks Windows out.
+ subdir = subdir.replace('/', os.path.sep)
+ subdir = os.path.join(root, subdir)
+
+ if not os.path.isdir(subdir):
+ logging.warning('%r is not a directory, so it can\'t be isolated',
+ subdir)
+ continue
+
+ infiles, metadata = isolateserver.directory_to_metadata(
+ subdir, self._cache_hash, [], True)
+
+ # The .isolated file to be created on disk. hashfile represents the
+ # file that will also be created along with this.
+ # e.g. *.isolated.sha-1 if the sha1 algorithm is used
+ isolated_file = unicode(os.path.join(cipd_cache, hashfile[:-5]))
+ data = {
+ 'algo': self._cache_hash_algo,
+ 'files': metadata,
+ 'version': isolated_format.ISOLATED_FILE_VERSION,
+ }
+ isolated_format.save_isolated(isolated_file, data)
+
+ for infile in infiles:
+ with open(os.path.join(subdir, infile.path) , 'r') as f:
M-A Ruel 2017/05/08 20:43:16 'rb'
kjlubick 2017/05/09 17:38:25 Done.
+ isolate_cache.write(infile.digest, f)
+
+ digest = isolated_format.hash_file(isolated_file, self._cache_hash)
+ with open(isolated_file , 'r') as f:
M-A Ruel 2017/05/08 20:43:16 'rb'
kjlubick 2017/05/09 17:38:25 Done
+ content = f.read()
+ isolate_cache.write(digest, content)
M-A Ruel 2017/05/08 20:43:16 you can calculate hash here, so the file is not re
kjlubick 2017/05/09 17:38:25 Done.
+
+ with open(os.path.join(cipd_cache, hashfile), 'w') as f:
+ f.write(digest)
def ensure(
- self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None):
+ self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None,
+ isolate_cache=None):
"""Ensures that packages installed in |site_root| equals |packages| set.
Blocking call.
+ Attempts to use the isolate cache to store the unzipped cipd files, keeping
+ a .isolated file in the cipd cache_dir
+
Args:
site_root (str): where to install packages.
packages: dict of subdir -> list of (package_template, version) tuples.
@@ -153,6 +292,8 @@ class CipdClient(object):
Typically contains packages and tags.
tmp_dir (str): if not None, dir for temp files.
timeout (int): if not None, timeout in seconds for this function to run.
+ isolate_cache (isolateserver.DiskCache): if not None, CIPD assets will
+ be unzipped and stored in this disk cache and extracted from there.
Returns:
Pinned packages in the form of {subdir: [(package_name, package_id)]},
@@ -163,26 +304,49 @@ class CipdClient(object):
"""
timeoutfn = tools.sliding_timeout(timeout)
logging.info('Installing packages %r into %s', packages, site_root)
-
ensure_file_handle, ensure_file_path = tempfile.mkstemp(
dir=tmp_dir, prefix=u'cipd-ensure-file-', suffix='.txt')
json_out_file_handle, json_file_path = tempfile.mkstemp(
dir=tmp_dir, prefix=u'cipd-ensure-result-', suffix='.json')
os.close(json_out_file_handle)
-
+ if cache_dir:
+ file_path.ensure_tree(unicode(cache_dir))
M-A Ruel 2017/05/08 20:43:16 I'd prefer to update call sites instead.
kjlubick 2017/05/09 17:38:25 Done. Only run_isolated needed it.
+ to_isolate = {}
+ from_isolate = {}
try:
try:
for subdir, pkgs in sorted(packages.iteritems()):
if '\n' in subdir:
raise Error(
'Could not install packages; subdir %r contains newline' % subdir)
+
+ # Join all the versions together so as to cause a new cached isolated
+ # to be used if any of them change.
+ versions = [p[1] for p in pkgs]
M-A Ruel 2017/05/08 20:43:16 why not each package be individually packed?
kjlubick 2017/05/09 17:38:25 I had tried that originally, but that gets tricky
M-A Ruel 2017/05/10 13:31:58 Hummm I still think it's not a great idea. Since t
+ cipd_isolated = '%s.%s.isolated.%s' % (subdir, '_'.join(versions),
M-A Ruel 2017/05/08 20:43:16 u
kjlubick 2017/05/09 17:38:25 Done.
+ self._cache_hash_algo)
+ cipd_isolated = os.path.join(cache_dir, cipd_isolated)
+ if (os.path.isfile(cipd_isolated) and
+ self._ensure_from_isolate(os.path.join(site_root, subdir),
+ cipd_isolated, isolate_cache)):
+ from_isolate[unicode(subdir)] = pkgs
+ continue
+ to_isolate[subdir] = cipd_isolated
os.write(ensure_file_handle, '@Subdir %s\n' % (subdir,))
for pkg, version in pkgs:
pkg = render_package_name_template(pkg)
os.write(ensure_file_handle, '%s %s\n' % (pkg, version))
+
finally:
os.close(ensure_file_handle)
+ # to_isolate is the packages that we need to ensure from CIPD and then
+ # isolate. Thus, if this is empty, we don't need to get anything from
+ # CIPD because they were successfully pulled from isolate. Thus return
+ # from_isolate, the pinned packages that we pulled from_isolate
+ if not to_isolate:
+ return from_isolate
+
cmd = [
self.binary_path, 'ensure',
'-root', site_root,
@@ -219,12 +383,16 @@ class CipdClient(object):
raise Error(
'Could not install packages; exit code %d\noutput:%s' % (
exit_code, '\n'.join(output)))
+
+ self._isolate_cipd(site_root, to_isolate, isolate_cache, cache_dir)
+
with open(json_file_path) as jfile:
result_json = json.load(jfile)
- return {
+ from_isolate.update({
subdir: [(x['package'], x['instance_id']) for x in pins]
for subdir, pins in result_json['result'].iteritems()
- }
+ })
+ return from_isolate
finally:
fs.remove(ensure_file_path)
fs.remove(json_file_path)
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698