Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(214)

Side by Side Diff: client/cipd.py

Issue 2847153002: Cache/retrieve extracted CIPD packages in local isolate cache (Closed)
Patch Set: Cache cipd packages individually (for peak freshness) Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """Fetches CIPD client and installs packages.""" 5 """Fetches CIPD client and installs packages."""
6 6
7 import contextlib 7 import contextlib
8 import hashlib 8 import hashlib
9 import json 9 import json
10 import logging 10 import logging
11 import optparse 11 import optparse
12 import os 12 import os
13 import platform 13 import platform
14 import re
15 import shutil
14 import sys 16 import sys
15 import tempfile 17 import tempfile
16 import time 18 import time
17 import urllib 19 import urllib
18 20
19 from utils import file_path 21 from utils import file_path
20 from utils import fs 22 from utils import fs
21 from utils import net 23 from utils import net
22 from utils import subprocess42 24 from utils import subprocess42
23 from utils import tools 25 from utils import tools
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
132 binary_path (str): path to the CIPD client binary. 134 binary_path (str): path to the CIPD client binary.
133 package_name (str): the CIPD package name for the client itself. 135 package_name (str): the CIPD package name for the client itself.
134 instance_id (str): the CIPD instance_id for the client itself. 136 instance_id (str): the CIPD instance_id for the client itself.
135 service_url (str): if not None, URL of the CIPD backend that overrides 137 service_url (str): if not None, URL of the CIPD backend that overrides
136 the default one. 138 the default one.
137 """ 139 """
138 self.binary_path = binary_path 140 self.binary_path = binary_path
139 self.package_name = package_name 141 self.package_name = package_name
140 self.instance_id = instance_id 142 self.instance_id = instance_id
141 self.service_url = service_url 143 self.service_url = service_url
144 self._cache_hash = hashlib.sha1
145 self._cache_hash_algo = (
146 isolated_format.SUPPORTED_ALGOS_REVERSE[self._cache_hash])
142 147
143 def ensure( 148 def ensure(
144 self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None): 149 self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None,
150 isolate_cache=None):
145 """Ensures that packages installed in |site_root| equals |packages| set. 151 """Ensures that packages installed in |site_root| equals |packages| set.
146 152
147 Blocking call. 153 Blocking call.
148 154
155 Attempts to use the isolate cache to store the unzipped cipd files, keeping
156 a .isolated file in the cipd cache_dir
157
149 Args: 158 Args:
150 site_root (str): where to install packages. 159 site_root (str): where to install packages.
151 packages: dict of subdir -> list of (package_template, version) tuples. 160 packages: dict of subdir -> list of (package_template, version) tuples.
152 cache_dir (str): if set, cache dir for cipd binary own cache. 161 cache_dir (str): if set, cache dir for cipd binary own cache.
153 Typically contains packages and tags. 162 Typically contains packages and tags.
154 tmp_dir (str): if not None, dir for temp files. 163 tmp_dir (str): if not None, dir for temp files.
155 timeout (int): if not None, timeout in seconds for this function to run. 164 timeout (int): if not None, timeout in seconds for this function to run.
165 isolate_cache (isolateserver.DiskCache): if not None, CIPD assets will
166 be unzipped and stored in this disk cache and extracted from there.
156 167
157 Returns: 168 Returns:
158 Pinned packages in the form of {subdir: [(package_name, package_id)]}, 169 Pinned packages in the form of {subdir: [(package_name, package_id)]},
159 which correspond 1:1 with the input packages argument. 170 which correspond 1:1 with the input packages argument.
160 171
161 Raises: 172 Raises:
162 Error if could not install packages or timed out. 173 Error if could not install packages or timed out.
163 """ 174 """
164 timeoutfn = tools.sliding_timeout(timeout) 175 timeoutfn = tools.sliding_timeout(timeout)
165 logging.info('Installing packages %r into %s', packages, site_root) 176 logging.info('Installing packages %r into %s', packages, site_root)
166
167 ensure_file_handle, ensure_file_path = tempfile.mkstemp( 177 ensure_file_handle, ensure_file_path = tempfile.mkstemp(
168 dir=tmp_dir, prefix=u'cipd-ensure-file-', suffix='.txt') 178 dir=tmp_dir, prefix=u'cipd-ensure-file-', suffix='.txt')
169 json_out_file_handle, json_file_path = tempfile.mkstemp( 179 json_out_file_handle, json_file_path = tempfile.mkstemp(
170 dir=tmp_dir, prefix=u'cipd-ensure-result-', suffix='.json') 180 dir=tmp_dir, prefix=u'cipd-ensure-result-', suffix='.json')
171 os.close(json_out_file_handle) 181 os.close(json_out_file_handle)
172 182 to_isolate = {}
183 from_isolate = {}
173 try: 184 try:
174 try: 185 try:
175 for subdir, pkgs in sorted(packages.iteritems()): 186 for subdir, pkgs in sorted(packages.iteritems()):
176 if '\n' in subdir: 187 if '\n' in subdir:
177 raise Error( 188 raise Error(
178 'Could not install packages; subdir %r contains newline' % subdir) 189 'Could not install packages; subdir %r contains newline' % subdir)
179 os.write(ensure_file_handle, '@Subdir %s\n' % (subdir,)) 190
180 for pkg, version in pkgs: 191 for pkg, version in pkgs:
181 pkg = render_package_name_template(pkg) 192 pkg = render_package_name_template(pkg)
182 os.write(ensure_file_handle, '%s %s\n' % (pkg, version)) 193
194 cipd_isolated = u'%s.%s.isolated.%s' % (pkg, version,
195 self._cache_hash_algo)
196 cipd_isolated = os.path.join(cache_dir, cipd_isolated)
197 if (self._ensure_from_isolate(os.path.join(site_root, subdir),
198 cipd_isolated, isolate_cache)):
199 from_isolate.setdefault(subdir, []).append((pkg, version))
200 else:
201 # we will need to pull it from cipd
202 to_isolate[pkg] = cipd_isolated
203 os.write(ensure_file_handle, '@Subdir %s\n' % pkg)
M-A Ruel 2017/05/10 20:00:57 One problem with this code is that the cipd packag
kjlubick 2017/05/10 20:20:59 I'm not quite sure what you mean by "cipd package
204 os.write(ensure_file_handle, '%s %s\n' % (pkg, version))
205
183 finally: 206 finally:
184 os.close(ensure_file_handle) 207 os.close(ensure_file_handle)
185 208
209 # to_isolate is the packages that we need to ensure from CIPD and then
210 # isolate. Thus, if this is empty, we don't need to get anything from
211 # CIPD because they were successfully pulled from isolate. Thus return
212 # from_isolate, the pinned packages that we pulled from_isolate
213 if not to_isolate:
214 return from_isolate
215
216 # call ensure once to put cipd packages in temp dir for caching
217 temp_root = tempfile.mkdtemp()
M-A Ruel 2017/05/10 20:00:57 prefix=u'cpid' this directory is never deleted?
kjlubick 2017/05/10 20:20:59 Done.
186 cmd = [ 218 cmd = [
187 self.binary_path, 'ensure', 219 self.binary_path, 'ensure',
188 '-root', site_root, 220 '-root', temp_root,
189 '-ensure-file', ensure_file_path, 221 '-ensure-file', ensure_file_path,
190 '-verbose', # this is safe because cipd-ensure does not print a lot 222 '-verbose', # this is safe because cipd-ensure does not print a lot
191 '-json-output', json_file_path, 223 '-json-output', json_file_path,
192 ] 224 ]
193 if cache_dir: 225 if cache_dir:
194 cmd += ['-cache-dir', cache_dir] 226 cmd += ['-cache-dir', cache_dir]
195 if self.service_url: 227 if self.service_url:
196 cmd += ['-service-url', self.service_url] 228 cmd += ['-service-url', self.service_url]
197 229
198 logging.debug('Running %r', cmd) 230 logging.debug('Running %r', cmd)
(...skipping 13 matching lines...) Expand all
212 if pipe_name == 'stderr': 244 if pipe_name == 'stderr':
213 logging.debug('cipd client: %s', line) 245 logging.debug('cipd client: %s', line)
214 else: 246 else:
215 logging.info('cipd client: %s', line) 247 logging.info('cipd client: %s', line)
216 248
217 exit_code = process.wait(timeout=timeoutfn()) 249 exit_code = process.wait(timeout=timeoutfn())
218 if exit_code != 0: 250 if exit_code != 0:
219 raise Error( 251 raise Error(
220 'Could not install packages; exit code %d\noutput:%s' % ( 252 'Could not install packages; exit code %d\noutput:%s' % (
221 exit_code, '\n'.join(output))) 253 exit_code, '\n'.join(output)))
222 with open(json_file_path) as jfile: 254
223 result_json = json.load(jfile) 255 # isolate them
224 return { 256 self._isolate_cipd(temp_root, to_isolate, isolate_cache, cache_dir)
225 subdir: [(x['package'], x['instance_id']) for x in pins] 257
226 for subdir, pins in result_json['result'].iteritems() 258 # pull them out of isolate into their location
227 } 259 for subdir, pkgs in sorted(packages.iteritems()):
260
M-A Ruel 2017/05/10 20:00:57 remove empty line
kjlubick 2017/05/10 20:20:59 Done.
261 for pkg, version in pkgs:
262 pkg = render_package_name_template(pkg)
263 if pkg not in to_isolate:
264 # The package was already pulled from cache earlier.
265 continue
266
267 cipd_isolated = u'%s.%s.isolated.%s' % (pkg, version,
268 self._cache_hash_algo)
269 cipd_isolated = os.path.join(cache_dir, cipd_isolated)
270 if (self._ensure_from_isolate(os.path.join(site_root, subdir),
M-A Ruel 2017/05/10 20:00:57 wrapping () not necessary. I'd prefer to reverse
kjlubick 2017/05/10 20:20:59 Done.
271 cipd_isolated, isolate_cache)):
272 from_isolate.setdefault(subdir, []).append((pkg, version))
273 else:
274 raise Error('cipd package %s not able to be pulled from isolate '
275 'cache after being put there' % cipd_isolated)
276
277 return from_isolate
228 finally: 278 finally:
229 fs.remove(ensure_file_path) 279 fs.remove(ensure_file_path)
230 fs.remove(json_file_path) 280 fs.remove(json_file_path)
231 281
282 def _ensure_from_isolate(self, target_dir, cipd_isolated, isolate_cache):
283 """Retrieves the CIPD packages from the isolate cache, if they exist.
284
285 This hardlinks or copies the files into the provided directory. It
286 basically does the opposite of _isolate_cipd for a given cipd_isolated
287 file.
288
289 Args:
290 target_dir: directory in which to put the files
291 cipd_isolated: the isolated.[hash] file created previously in
292 _isolate_cipd
293 isolate_cache: the isolateserver.DiskCache in which the files may be
294 stored
295
296 Returns:
297 True if the isolated file and all the isolated contents were successfully
298 found in the isolate cache and put into place using hardlinks with a copy
299 fallback. False otherwise.
300 """
301 if not os.path.isfile(cipd_isolated):
302 logging.info('Not ensuring cipd from isolate cache cipd_isolated %s is '
303 'missing', cipd_isolated)
304 return False
305 if not isolate_cache:
306 logging.info('Not ensuring cipd from isolate cache isolate_cache is not'
307 'defined: %s', isolate_cache)
308 return False
309 try:
310 with open(cipd_isolated , 'rb') as f:
311 digest = f.read()
312 try:
313 content = isolate_cache.getfileobj(digest).read()
M-A Ruel 2017/05/10 20:00:56 with isolate_cache.getfileobj(digest) as f: cont
kjlubick 2017/05/10 20:20:59 Done.
314 except Exception as e:
M-A Ruel 2017/05/10 20:00:56 too broad, you want isolateserver.CacheMiss
kjlubick 2017/05/10 20:21:00 Done.
315 logging.warning('Could not find isolated file in cache with digest '
316 '%s: %s', digest, e)
317 return False
318
319 ifile = isolated_format.IsolatedFile(digest, self._cache_hash)
320 ifile.load(content)
321
322 if not isolateserver.is_cached(ifile, isolate_cache):
323 logging.info('Cached CIPD asset(s) %s are incomplete', cipd_isolated)
324 return False
325
326 file_path.ensure_tree(target_dir)
327 return isolateserver.extract(ifile, target_dir, isolate_cache)
328 except OSError as e:
M-A Ruel 2017/05/10 20:00:57 I'm wondering; it would apply to lines 310 and 311
kjlubick 2017/05/10 20:20:59 Good point. I think I had it for debugging asserts
329 logging.warning('Could not ensure cipd package from isolate %s', e)
330 return False
331
332 return True
333
334 def _isolate_cipd(self, root, pkgs, isolate_cache, cipd_cache):
335 """Puts the content of the CIPD subdirectories into the isolate cache.
336
337 This creates a .isolated file representing each subdirectory. This .isolated
338 file goes into the isolate_cache as well, and a .isolated.[hash] file
339 goes into cipd_cache for extraction in _ensure_from_isolate(). The suffix
340 will be related to self._cache_hash_algo (.sha-1 for SHA-1, etc)
341
342 This allows for disks with slow I/O (e.g. Raspberry Pis) to not have to
343 re-extract the CIPD zips every time, potentially saving a bunch of time.
344
345 It is assumed that the cipd packages have been extracted to root/name.
346
347 Arg:
348 root: where packages are installed
349 pkgs: dict of name -> isolated_hash where isolated_hash is
350 [name].[version].isolated.[hash] This is the file to use as the
351 .isolated and its corresponding hash.
352 isolate_cache: A isolateserver.DiskCache used to store files locally
353 cipd_cache: A directory in which to put the *isolated.[hash] files
354 """
355 if not isolate_cache or not os.path.isdir(cipd_cache):
356 logging.info('Not putting cipd into isolate cache because one of the'
357 'caches is empty: %s, %s', isolate_cache, cipd_cache)
358 return
359 for pkg_dir, hashfile in pkgs.iteritems():
360 if not pkg_dir:
361 logging.info('not caching to %s because it extracts to ./', hashfile)
362 continue
363 # The pkgs given to us from cipd are always in foo/bar/baz format
364 # which freaks Windows out.
365 pkg_dir = pkg_dir.replace('/', os.path.sep)
366 pkg_dir = unicode(os.path.join(root, pkg_dir))
367
368 if not os.path.isdir(pkg_dir):
369 logging.warning('%r is not a directory, so it can\'t be isolated',
370 pkg_dir)
371 continue
372
373 infiles, metadata = isolateserver.directory_to_metadata(
374 pkg_dir, self._cache_hash, [], True)
375
376 # The .isolated file to be created on disk. hashfile represents the
377 # file that will also be created along with this.
378 # e.g. *.isolated.sha-1 if the sha1 algorithm is used
379 isolated_file = unicode(os.path.join(cipd_cache,
M-A Ruel 2017/05/10 20:00:57 inconsistent argument wrapping
kjlubick 2017/05/10 20:20:59 Done.
380 hashfile[:-1-len(self._cache_hash_algo)]))
381 data = {
382 'algo': self._cache_hash_algo,
383 'files': metadata,
384 'version': isolated_format.ISOLATED_FILE_VERSION,
385 }
386 # For packages like skia/bots/foo the isolated_file will end up in
387 # the skia/bots directory, which may not exist.
388 file_path.ensure_tree(os.path.dirname(isolated_file))
389 isolated_format.save_isolated(isolated_file, data)
390
391 for infile in infiles:
392 with open(os.path.join(pkg_dir, infile.path) , 'rb') as f:
393 isolate_cache.write(infile.digest, f)
394
395 with open(isolated_file , 'rb') as f:
396 content = f.read()
397 digest = self._cache_hash(content).hexdigest()
M-A Ruel 2017/05/10 20:00:57 this can be outside the closure
kjlubick 2017/05/10 20:20:59 Done.
398 isolate_cache.write(digest, content)
399
400 with open(os.path.join(cipd_cache, hashfile), 'w') as f:
M-A Ruel 2017/05/10 20:00:56 'wb'
kjlubick 2017/05/10 20:21:00 Done.
401 f.write(digest)
232 402
233 def get_platform(): 403 def get_platform():
234 """Returns ${platform} parameter value. 404 """Returns ${platform} parameter value.
235 405
236 Borrowed from 406 Borrowed from
237 https://chromium.googlesource.com/infra/infra/+/aaf9586/build/build.py#204 407 https://chromium.googlesource.com/infra/infra/+/aaf9586/build/build.py#204
238 """ 408 """
239 # linux, mac or windows. 409 # linux, mac or windows.
240 platform_variant = { 410 platform_variant = {
241 'darwin': 'mac', 411 'darwin': 'mac',
(...skipping 232 matching lines...) Expand 10 before | Expand all | Expand 10 after
474 """ 644 """
475 result = [] 645 result = []
476 for pkg in packages: 646 for pkg in packages:
477 path, name, version = pkg.split(':', 2) 647 path, name, version = pkg.split(':', 2)
478 if not name: 648 if not name:
479 raise Error('Invalid package "%s": package name is not specified' % pkg) 649 raise Error('Invalid package "%s": package name is not specified' % pkg)
480 if not version: 650 if not version:
481 raise Error('Invalid package "%s": version is not specified' % pkg) 651 raise Error('Invalid package "%s": version is not specified' % pkg)
482 result.append((path, name, version)) 652 result.append((path, name, version))
483 return result 653 return result
OLDNEW
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698