Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(181)

Side by Side Diff: client/cipd.py

Issue 2847153002: Cache/retrieve extracted CIPD packages in local isolate cache (Closed)
Patch Set: clean up formatting Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """Fetches CIPD client and installs packages.""" 5 """Fetches CIPD client and installs packages."""
6 6
7 import contextlib 7 import contextlib
8 import hashlib 8 import hashlib
9 import json 9 import json
10 import logging 10 import logging
11 import optparse 11 import optparse
12 import os 12 import os
13 import platform 13 import platform
14 import re
15 import shutil
14 import sys 16 import sys
15 import tempfile 17 import tempfile
16 import time 18 import time
17 import urllib 19 import urllib
18 20
19 from utils import file_path 21 from utils import file_path
20 from utils import fs 22 from utils import fs
21 from utils import net 23 from utils import net
22 from utils import subprocess42 24 from utils import subprocess42
23 from utils import tools 25 from utils import tools
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
132 binary_path (str): path to the CIPD client binary. 134 binary_path (str): path to the CIPD client binary.
133 package_name (str): the CIPD package name for the client itself. 135 package_name (str): the CIPD package name for the client itself.
134 instance_id (str): the CIPD instance_id for the client itself. 136 instance_id (str): the CIPD instance_id for the client itself.
135 service_url (str): if not None, URL of the CIPD backend that overrides 137 service_url (str): if not None, URL of the CIPD backend that overrides
136 the default one. 138 the default one.
137 """ 139 """
138 self.binary_path = binary_path 140 self.binary_path = binary_path
139 self.package_name = package_name 141 self.package_name = package_name
140 self.instance_id = instance_id 142 self.instance_id = instance_id
141 self.service_url = service_url 143 self.service_url = service_url
144 self._cache_hash = hashlib.sha1
145 self._cache_hash_algo = (
146 isolated_format.SUPPORTED_ALGOS_REVERSE[self._cache_hash])
147
148 def _ensure_from_isolate(self, target_dir, cipd_isolated, isolate_cache):
149 """
150 Retrieves the CIPD subdirectories from the isolate cache, if they exist,
151 hardlinking or copying the files into the provided directory.
152
153 Does the opposite of _isolate_cipd for a given cipd_isolated file.
154
155 Args:
156 target_dir: directory in which to put the files.
157 cipd_isolated: the isolated.[hash] file created previously in
158 _isolate_cipd
159 isolate_cache: the isolateserver.DiskCache in which the files may be
160 stored.
161
162 Returns:
163 True if the isolated file and all the isolated contents were successfully
164 found in the isolate cache and put into place using hardlinks with a copy
165 fallback. False otherwise.
166 """
167 if not isolate_cache:
168 logging.info('Not ensuring cipd from isolate cache isolate_cache is not'
169 'defined: %s', isolate_cache)
170 return False
171 try:
172 with open(cipd_isolated , 'r') as f:
M-A Ruel 2017/05/08 20:43:16 'rb' it makes it faster on windows.
kjlubick 2017/05/09 17:38:25 Done.
173 digest = str(f.read())
174 try:
175 content = isolate_cache.getfileobj(digest).read()
176 except Exception as e:
177 logging.warning('Could not find isolated file in cache with digest '
178 '%s: %s', digest, e)
179 return False
180
181 ifile = isolated_format.IsolatedFile(digest, self._cache_hash)
M-A Ruel 2017/05/08 20:43:16 This part should be a function not a method. I'm t
kjlubick 2017/05/09 17:38:25 Done.
182 ifile.load(content)
183
184 file_path.ensure_tree(target_dir)
185 files = ifile.data.get(u'files', {})
186 for f in files.keys():
M-A Ruel 2017/05/08 20:43:16 for f, props in files.iteritems():
kjlubick 2017/05/09 17:38:25 I can't - I get "Too many values to unpack" Comme
187 props = files.get(f, None)
188 if not props:
189 logging.warning('Problem getting info for %s', f)
190 return False
191 file_mode = props.get('m', None)
192 if file_mode:
193 # Ignore all bits apart from the user
194 file_mode &= 0700
195
196 dstpath = os.path.join(target_dir, f)
197 file_path.ensure_tree(os.path.dirname(dstpath))
198 digest = props.get('h', None)
199 if not digest:
200 logging.warning('Hash can\'t be empty %s', f)
201 return False
202 srcpath = isolate_cache.getfileobj(digest).name
203
204 file_path.link_file(unicode(dstpath), unicode(srcpath),
205 file_path.HARDLINK_WITH_FALLBACK)
206
207 if file_mode is not None:
208 fs.chmod(dstpath, file_mode)
209 except Exception as e:
210 logging.warning('Could not ensure cipd package from isolate %s', e)
211 return False
212
213 return True
214
215 def _isolate_cipd(self, root, subdirs, isolate_cache, cipd_cache):
kjlubick 2017/05/08 18:59:29 This doesn't support includes like we mentioned.
216 """
217 Puts the content of the CIPD subdirectories into the isolate cache,
218 creating a .isolated file representing each subdirectory. This .isolated
219 file goes into the isolate_cache as well, and a .isolated.[hash] file
220 goes into cipd_cache for extraction in _ensure_from_isolate(). The suffix
221 will be related to self._cache_hash_algo (.sha-1 for SHA-1, etc)
222
223 This allows for disks with slow I/O (e.g. Raspberry Pis) to not have to
224 re-extract the CIPD zips every time, potentially saving a bunch of time.
225
226 Arg:
227 root: where packages are installed.
228 subdirs: dict of subdir -> name_verision where name_version is
M-A Ruel 2017/05/08 20:43:16 name_version
kjlubick 2017/05/09 17:38:25 Done.
229 [subdir].[pkg1version_pkg2version...].isolated.[hash] This way, if
230 any packages are updated, the cached files will also change.
231 isolate_cache: A isolateserver.DiskCache used to store files locally.
232 cipd_cache: A directory in which to put the *isolated.[hash] files.
233 """
234 if not isolate_cache or not os.path.isdir(cipd_cache):
235 logging.info('Not putting cipd into isolate cache because one of the'
236 'caches is empty: %s, %s', isolate_cache, cipd_cache)
237 return
238 for subdir, hashfile in subdirs.iteritems():
239 if not subdir:
240 logging.info('not caching to %s because it extracts to ./', hashfile)
241 continue
242 # The subdirs given to us from cipd are always in foo/bar/baz format
243 # which freaks Windows out.
244 subdir = subdir.replace('/', os.path.sep)
245 subdir = os.path.join(root, subdir)
246
247 if not os.path.isdir(subdir):
248 logging.warning('%r is not a directory, so it can\'t be isolated',
249 subdir)
250 continue
251
252 infiles, metadata = isolateserver.directory_to_metadata(
253 subdir, self._cache_hash, [], True)
254
255 # The .isolated file to be created on disk. hashfile represents the
256 # file that will also be created along with this.
257 # e.g. *.isolated.sha-1 if the sha1 algorithm is used
258 isolated_file = unicode(os.path.join(cipd_cache, hashfile[:-5]))
259 data = {
260 'algo': self._cache_hash_algo,
261 'files': metadata,
262 'version': isolated_format.ISOLATED_FILE_VERSION,
263 }
264 isolated_format.save_isolated(isolated_file, data)
265
266 for infile in infiles:
267 with open(os.path.join(subdir, infile.path) , 'r') as f:
M-A Ruel 2017/05/08 20:43:16 'rb'
kjlubick 2017/05/09 17:38:25 Done.
268 isolate_cache.write(infile.digest, f)
269
270 digest = isolated_format.hash_file(isolated_file, self._cache_hash)
271 with open(isolated_file , 'r') as f:
M-A Ruel 2017/05/08 20:43:16 'rb'
kjlubick 2017/05/09 17:38:25 Done
272 content = f.read()
273 isolate_cache.write(digest, content)
M-A Ruel 2017/05/08 20:43:16 you can calculate hash here, so the file is not re
kjlubick 2017/05/09 17:38:25 Done.
274
275 with open(os.path.join(cipd_cache, hashfile), 'w') as f:
276 f.write(digest)
142 277
143 def ensure( 278 def ensure(
144 self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None): 279 self, site_root, packages, cache_dir=None, tmp_dir=None, timeout=None,
280 isolate_cache=None):
145 """Ensures that packages installed in |site_root| equals |packages| set. 281 """Ensures that packages installed in |site_root| equals |packages| set.
146 282
147 Blocking call. 283 Blocking call.
148 284
285 Attempts to use the isolate cache to store the unzipped cipd files, keeping
286 a .isolated file in the cipd cache_dir
287
149 Args: 288 Args:
150 site_root (str): where to install packages. 289 site_root (str): where to install packages.
151 packages: dict of subdir -> list of (package_template, version) tuples. 290 packages: dict of subdir -> list of (package_template, version) tuples.
152 cache_dir (str): if set, cache dir for cipd binary own cache. 291 cache_dir (str): if set, cache dir for cipd binary own cache.
153 Typically contains packages and tags. 292 Typically contains packages and tags.
154 tmp_dir (str): if not None, dir for temp files. 293 tmp_dir (str): if not None, dir for temp files.
155 timeout (int): if not None, timeout in seconds for this function to run. 294 timeout (int): if not None, timeout in seconds for this function to run.
295 isolate_cache (isolateserver.DiskCache): if not None, CIPD assets will
296 be unzipped and stored in this disk cache and extracted from there.
156 297
157 Returns: 298 Returns:
158 Pinned packages in the form of {subdir: [(package_name, package_id)]}, 299 Pinned packages in the form of {subdir: [(package_name, package_id)]},
159 which correspond 1:1 with the input packages argument. 300 which correspond 1:1 with the input packages argument.
160 301
161 Raises: 302 Raises:
162 Error if could not install packages or timed out. 303 Error if could not install packages or timed out.
163 """ 304 """
164 timeoutfn = tools.sliding_timeout(timeout) 305 timeoutfn = tools.sliding_timeout(timeout)
165 logging.info('Installing packages %r into %s', packages, site_root) 306 logging.info('Installing packages %r into %s', packages, site_root)
166
167 ensure_file_handle, ensure_file_path = tempfile.mkstemp( 307 ensure_file_handle, ensure_file_path = tempfile.mkstemp(
168 dir=tmp_dir, prefix=u'cipd-ensure-file-', suffix='.txt') 308 dir=tmp_dir, prefix=u'cipd-ensure-file-', suffix='.txt')
169 json_out_file_handle, json_file_path = tempfile.mkstemp( 309 json_out_file_handle, json_file_path = tempfile.mkstemp(
170 dir=tmp_dir, prefix=u'cipd-ensure-result-', suffix='.json') 310 dir=tmp_dir, prefix=u'cipd-ensure-result-', suffix='.json')
171 os.close(json_out_file_handle) 311 os.close(json_out_file_handle)
172 312 if cache_dir:
313 file_path.ensure_tree(unicode(cache_dir))
M-A Ruel 2017/05/08 20:43:16 I'd prefer to update call sites instead.
kjlubick 2017/05/09 17:38:25 Done. Only run_isolated needed it.
314 to_isolate = {}
315 from_isolate = {}
173 try: 316 try:
174 try: 317 try:
175 for subdir, pkgs in sorted(packages.iteritems()): 318 for subdir, pkgs in sorted(packages.iteritems()):
176 if '\n' in subdir: 319 if '\n' in subdir:
177 raise Error( 320 raise Error(
178 'Could not install packages; subdir %r contains newline' % subdir) 321 'Could not install packages; subdir %r contains newline' % subdir)
322
323 # Join all the versions together so as to cause a new cached isolated
324 # to be used if any of them change.
325 versions = [p[1] for p in pkgs]
M-A Ruel 2017/05/08 20:43:16 why not each package be individually packed?
kjlubick 2017/05/09 17:38:25 I had tried that originally, but that gets tricky
M-A Ruel 2017/05/10 13:31:58 Hummm I still think it's not a great idea. Since t
326 cipd_isolated = '%s.%s.isolated.%s' % (subdir, '_'.join(versions),
M-A Ruel 2017/05/08 20:43:16 u
kjlubick 2017/05/09 17:38:25 Done.
327 self._cache_hash_algo)
328 cipd_isolated = os.path.join(cache_dir, cipd_isolated)
329 if (os.path.isfile(cipd_isolated) and
330 self._ensure_from_isolate(os.path.join(site_root, subdir),
331 cipd_isolated, isolate_cache)):
332 from_isolate[unicode(subdir)] = pkgs
333 continue
334 to_isolate[subdir] = cipd_isolated
179 os.write(ensure_file_handle, '@Subdir %s\n' % (subdir,)) 335 os.write(ensure_file_handle, '@Subdir %s\n' % (subdir,))
180 for pkg, version in pkgs: 336 for pkg, version in pkgs:
181 pkg = render_package_name_template(pkg) 337 pkg = render_package_name_template(pkg)
182 os.write(ensure_file_handle, '%s %s\n' % (pkg, version)) 338 os.write(ensure_file_handle, '%s %s\n' % (pkg, version))
339
183 finally: 340 finally:
184 os.close(ensure_file_handle) 341 os.close(ensure_file_handle)
185 342
343 # to_isolate is the packages that we need to ensure from CIPD and then
344 # isolate. Thus, if this is empty, we don't need to get anything from
345 # CIPD because they were successfully pulled from isolate. Thus return
346 # from_isolate, the pinned packages that we pulled from_isolate
347 if not to_isolate:
348 return from_isolate
349
186 cmd = [ 350 cmd = [
187 self.binary_path, 'ensure', 351 self.binary_path, 'ensure',
188 '-root', site_root, 352 '-root', site_root,
189 '-ensure-file', ensure_file_path, 353 '-ensure-file', ensure_file_path,
190 '-verbose', # this is safe because cipd-ensure does not print a lot 354 '-verbose', # this is safe because cipd-ensure does not print a lot
191 '-json-output', json_file_path, 355 '-json-output', json_file_path,
192 ] 356 ]
193 if cache_dir: 357 if cache_dir:
194 cmd += ['-cache-dir', cache_dir] 358 cmd += ['-cache-dir', cache_dir]
195 if self.service_url: 359 if self.service_url:
(...skipping 16 matching lines...) Expand all
212 if pipe_name == 'stderr': 376 if pipe_name == 'stderr':
213 logging.debug('cipd client: %s', line) 377 logging.debug('cipd client: %s', line)
214 else: 378 else:
215 logging.info('cipd client: %s', line) 379 logging.info('cipd client: %s', line)
216 380
217 exit_code = process.wait(timeout=timeoutfn()) 381 exit_code = process.wait(timeout=timeoutfn())
218 if exit_code != 0: 382 if exit_code != 0:
219 raise Error( 383 raise Error(
220 'Could not install packages; exit code %d\noutput:%s' % ( 384 'Could not install packages; exit code %d\noutput:%s' % (
221 exit_code, '\n'.join(output))) 385 exit_code, '\n'.join(output)))
386
387 self._isolate_cipd(site_root, to_isolate, isolate_cache, cache_dir)
388
222 with open(json_file_path) as jfile: 389 with open(json_file_path) as jfile:
223 result_json = json.load(jfile) 390 result_json = json.load(jfile)
224 return { 391 from_isolate.update({
225 subdir: [(x['package'], x['instance_id']) for x in pins] 392 subdir: [(x['package'], x['instance_id']) for x in pins]
226 for subdir, pins in result_json['result'].iteritems() 393 for subdir, pins in result_json['result'].iteritems()
227 } 394 })
395 return from_isolate
228 finally: 396 finally:
229 fs.remove(ensure_file_path) 397 fs.remove(ensure_file_path)
230 fs.remove(json_file_path) 398 fs.remove(json_file_path)
231 399
232 400
233 def get_platform(): 401 def get_platform():
234 """Returns ${platform} parameter value. 402 """Returns ${platform} parameter value.
235 403
236 Borrowed from 404 Borrowed from
237 https://chromium.googlesource.com/infra/infra/+/aaf9586/build/build.py#204 405 https://chromium.googlesource.com/infra/infra/+/aaf9586/build/build.py#204
(...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after
474 """ 642 """
475 result = [] 643 result = []
476 for pkg in packages: 644 for pkg in packages:
477 path, name, version = pkg.split(':', 2) 645 path, name, version = pkg.split(':', 2)
478 if not name: 646 if not name:
479 raise Error('Invalid package "%s": package name is not specified' % pkg) 647 raise Error('Invalid package "%s": package name is not specified' % pkg)
480 if not version: 648 if not version:
481 raise Error('Invalid package "%s": version is not specified' % pkg) 649 raise Error('Invalid package "%s": version is not specified' % pkg)
482 result.append((path, name, version)) 650 result.append((path, name, version))
483 return result 651 return result
OLDNEW
« no previous file with comments | « appengine/swarming/server/bot_archive.py ('k') | client/isolated_format.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698