Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(137)

Unified Diff: client/cipd.py

Issue 2847153002: Cache/retrieve extracted CIPD packages in local isolate cache (Closed)
Patch Set: Refactor into isolateserver Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: client/cipd.py
diff --git a/client/cipd.py b/client/cipd.py
index 6fb16324368a20f75f121cf4ecdbe24c402ca925..da356fa555e1106eb177a8ae7a438ab1f45ffda9 100644
--- a/client/cipd.py
+++ b/client/cipd.py
@@ -11,6 +11,8 @@ import logging
import optparse
import os
import platform
+import re
+import shutil
import sys
import tempfile
import time
@@ -139,13 +141,131 @@ class CipdClient(object):
self.package_name = package_name
self.instance_id = instance_id
self.service_url = service_url
+ self._cache_hash = hashlib.sha1
+ self._cache_hash_algo = (
+ isolated_format.SUPPORTED_ALGOS_REVERSE[self._cache_hash])
+
+ def _ensure_from_isolate(self, target_dir, cipd_isolated, isolate_cache):
+ """
+ Retrieves the CIPD subdirectories from the isolate cache, if they exist,
M-A Ruel 2017/05/10 13:31:58 please start comment on the first line generally p
kjlubick 2017/05/10 19:41:00 Done.
+ hardlinking or copying the files into the provided directory.
+
+ Does the opposite of _isolate_cipd for a given cipd_isolated file.
+
+ Args:
+ target_dir: directory in which to put the files.
M-A Ruel 2017/05/10 13:31:58 inconsistent dots, remove
kjlubick 2017/05/10 19:41:00 Done.
+ cipd_isolated: the isolated.[hash] file created previously in
+ _isolate_cipd
+ isolate_cache: the isolateserver.DiskCache in which the files may be
+ stored.
+
+ Returns:
+ True if the isolated file and all the isolated contents were successfully
+ found in the isolate cache and put into place using hardlinks with a copy
+ fallback. False otherwise.
+ """
+ if not isolate_cache:
+ logging.info('Not ensuring cipd from isolate cache isolate_cache is not'
+ 'defined: %s', isolate_cache)
+ return False
+ try:
+ with open(cipd_isolated , 'rb') as f:
+ digest = str(f.read())
M-A Ruel 2017/05/10 13:31:58 it's already an str.
kjlubick 2017/05/10 19:41:01 Done.
+ try:
+ content = isolate_cache.getfileobj(digest).read()
+ except Exception as e:
M-A Ruel 2017/05/10 13:31:58 that's too broad, please scope down
kjlubick 2017/05/10 19:41:00 Done.
+ logging.warning('Could not find isolated file in cache with digest '
+ '%s: %s', digest, e)
+ return False
+
+ ifile = isolated_format.IsolatedFile(digest, self._cache_hash)
+ ifile.load(content)
+
+ if not isolateserver.is_cached(ifile, isolate_cache):
+ logging.info('Cached CIPD asset(s) %s are incomplete', cipd_isolated)
+ return False
+
+ file_path.ensure_tree(target_dir)
+ return isolateserver.extract(ifile, target_dir, isolate_cache)
+ except Exception as e:
+ logging.warning('Could not ensure cipd package from isolate %s', e)
+ return False
+
+ return True
+
+ def _isolate_cipd(self, root, subdirs, isolate_cache, cipd_cache):
+ """
+ Puts the content of the CIPD subdirectories into the isolate cache,
M-A Ruel 2017/05/10 13:31:58 single sentence empty line rest
kjlubick 2017/05/10 19:41:01 Done.
+ creating a .isolated file representing each subdirectory. This .isolated
+ file goes into the isolate_cache as well, and a .isolated.[hash] file
+ goes into cipd_cache for extraction in _ensure_from_isolate(). The suffix
+ will be related to self._cache_hash_algo (.sha-1 for SHA-1, etc)
+
+ This allows for disks with slow I/O (e.g. Raspberry Pis) to not have to
+ re-extract the CIPD zips every time, potentially saving a bunch of time.
+
+ Arg:
+ root: where packages are installed.
+ subdirs: dict of subdir -> name_version where name_version is
+ [subdir].[pkg1version_pkg2version...].isolated.[hash] This way, if
+ any packages are updated, the cached files will also change.
+ isolate_cache: A isolateserver.DiskCache used to store files locally.
+ cipd_cache: A directory in which to put the *isolated.[hash] files.
+ """
+ if not isolate_cache or not os.path.isdir(cipd_cache):
+ logging.info('Not putting cipd into isolate cache because one of the'
+ 'caches is empty: %s, %s', isolate_cache, cipd_cache)
+ return
+ for subdir, hashfile in subdirs.iteritems():
+ if not subdir:
+ logging.info('not caching to %s because it extracts to ./', hashfile)
+ continue
+ # The subdirs given to us from cipd are always in foo/bar/baz format
+ # which freaks Windows out.
+ subdir = subdir.replace('/', os.path.sep)
+ subdir = os.path.join(root, subdir)
+
+ if not os.path.isdir(subdir):
+ logging.warning('%r is not a directory, so it can\'t be isolated',
+ subdir)
+ continue
+
+ infiles, metadata = isolateserver.directory_to_metadata(
+ subdir, self._cache_hash, [], True)
+
+ # The .isolated file to be created on disk. hashfile represents the
+ # file that will also be created along with this.
+ # e.g. *.isolated.sha-1 if the sha1 algorithm is used
+ isolated_file = unicode(os.path.join(cipd_cache, hashfile[:-5]))
+ data = {
+ 'algo': self._cache_hash_algo,
+ 'files': metadata,
+ 'version': isolated_format.ISOLATED_FILE_VERSION,
+ }
+ isolated_format.save_isolated(isolated_file, data)
+
+ for infile in infiles:
+ with open(os.path.join(subdir, infile.path) , 'rb') as f:
+ isolate_cache.write(infile.digest, f)
+
+ with open(isolated_file , 'rb') as f:
+ content = f.read()
+ digest = self._cache_hash(content).hexdigest()
+ isolate_cache.write(digest, content)
+
+ with open(os.path.join(cipd_cache, hashfile), 'w') as f:
+ f.write(digest)
def ensure(
- self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None):
+ self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None,
+ isolate_cache=None):
"""Ensures that packages installed in |site_root| equals |packages| set.
Blocking call.
+ Attempts to use the isolate cache to store the unzipped cipd files, keeping
+ a .isolated file in the cipd cache_dir
+
Args:
site_root (str): where to install packages.
packages: dict of subdir -> list of (package_template, version) tuples.
@@ -153,6 +273,8 @@ class CipdClient(object):
Typically contains packages and tags.
tmp_dir (str): if not None, dir for temp files.
timeout (int): if not None, timeout in seconds for this function to run.
+ isolate_cache (isolateserver.DiskCache): if not None, CIPD assets will
+ be unzipped and stored in this disk cache and extracted from there.
Returns:
Pinned packages in the form of {subdir: [(package_name, package_id)]},
@@ -163,26 +285,47 @@ class CipdClient(object):
"""
timeoutfn = tools.sliding_timeout(timeout)
logging.info('Installing packages %r into %s', packages, site_root)
-
ensure_file_handle, ensure_file_path = tempfile.mkstemp(
dir=tmp_dir, prefix=u'cipd-ensure-file-', suffix='.txt')
json_out_file_handle, json_file_path = tempfile.mkstemp(
dir=tmp_dir, prefix=u'cipd-ensure-result-', suffix='.json')
os.close(json_out_file_handle)
-
+ to_isolate = {}
+ from_isolate = {}
try:
try:
for subdir, pkgs in sorted(packages.iteritems()):
if '\n' in subdir:
raise Error(
'Could not install packages; subdir %r contains newline' % subdir)
+
+ # Join all the versions together so as to cause a new cached isolated
+ # to be used if any of them change.
+ versions = [p[1] for p in pkgs]
+ cipd_isolated = u'%s.%s.isolated.%s' % (subdir, '_'.join(versions),
+ self._cache_hash_algo)
+ cipd_isolated = os.path.join(cache_dir, cipd_isolated)
+ if (os.path.isfile(cipd_isolated) and
+ self._ensure_from_isolate(os.path.join(site_root, subdir),
+ cipd_isolated, isolate_cache)):
+ from_isolate[unicode(subdir)] = pkgs
+ continue
+ to_isolate[subdir] = cipd_isolated
os.write(ensure_file_handle, '@Subdir %s\n' % (subdir,))
for pkg, version in pkgs:
pkg = render_package_name_template(pkg)
os.write(ensure_file_handle, '%s %s\n' % (pkg, version))
+
finally:
os.close(ensure_file_handle)
+ # to_isolate is the packages that we need to ensure from CIPD and then
+ # isolate. Thus, if this is empty, we don't need to get anything from
+ # CIPD because they were successfully pulled from isolate. Thus return
+ # from_isolate, the pinned packages that we pulled from_isolate
+ if not to_isolate:
+ return from_isolate
+
cmd = [
self.binary_path, 'ensure',
'-root', site_root,
@@ -219,12 +362,16 @@ class CipdClient(object):
raise Error(
'Could not install packages; exit code %d\noutput:%s' % (
exit_code, '\n'.join(output)))
+
+ self._isolate_cipd(site_root, to_isolate, isolate_cache, cache_dir)
+
with open(json_file_path) as jfile:
result_json = json.load(jfile)
- return {
+ from_isolate.update({
subdir: [(x['package'], x['instance_id']) for x in pins]
for subdir, pins in result_json['result'].iteritems()
- }
+ })
+ return from_isolate
finally:
fs.remove(ensure_file_path)
fs.remove(json_file_path)
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698