Chromium Code Reviews| Index: tools/isolate/tree_creator.py |
| diff --git a/tools/isolate/tree_creator.py b/tools/isolate/tree_creator.py |
| old mode 100644 |
| new mode 100755 |
| index ad0990cc63d6d7ec0c5705924e34db4d3ab7e8aa..44b19707974c50da6e519d6f4656f5106830dfe7 |
| --- a/tools/isolate/tree_creator.py |
| +++ b/tools/isolate/tree_creator.py |
| @@ -1,21 +1,25 @@ |
| +#!/usr/bin/env python |
| # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| -"""File related utility functions. |
| +"""Reads a manifest, creates a tree of hardlinks and runs the test. |
| -Creates a tree of hardlinks, symlinks or copy the inputs files. Calculate files |
| -hash. |
| +Keeps a local cache. |
| """ |
| import ctypes |
| -import hashlib |
| +import json |
| import logging |
| +import optparse |
| import os |
| +import re |
| import shutil |
| -import stat |
| +import subprocess |
| import sys |
| +import tempfile |
| import time |
| +import urllib |
| # Types of action accepted by recreate_tree(). |
| @@ -37,77 +41,20 @@ def os_link(source, link_name): |
| os.link(source, link_name) |
| -def expand_directories(indir, infiles, blacklist): |
| - """Expands the directories, applies the blacklist and verifies files exist.""" |
| - logging.debug('expand_directories(%s, %s, %s)' % (indir, infiles, blacklist)) |
| - outfiles = [] |
| - for relfile in infiles: |
| - if os.path.isabs(relfile): |
| - raise MappingError('Can\'t map absolute path %s' % relfile) |
| - infile = os.path.normpath(os.path.join(indir, relfile)) |
| - if not infile.startswith(indir): |
| - raise MappingError('Can\'t map file %s outside %s' % (infile, indir)) |
| - |
| - if relfile.endswith('/'): |
| - if not os.path.isdir(infile): |
| - raise MappingError( |
| - 'Input directory %s must have a trailing slash' % infile) |
| - for dirpath, dirnames, filenames in os.walk(infile): |
| - # Convert the absolute path to subdir + relative subdirectory. |
| - relpath = dirpath[len(indir)+1:] |
| - outfiles.extend(os.path.join(relpath, f) for f in filenames) |
| - for index, dirname in enumerate(dirnames): |
| - # Do not process blacklisted directories. |
| - if blacklist(os.path.join(relpath, dirname)): |
| - del dirnames[index] |
| - else: |
| - if not os.path.isfile(infile): |
| - raise MappingError('Input file %s doesn\'t exist' % infile) |
| - outfiles.append(relfile) |
| - return outfiles |
| - |
| - |
| -def process_inputs(indir, infiles, need_hash, read_only): |
| - """Returns a dictionary of input files, populated with the files' mode and |
| - hash. |
| - |
| - The file mode is manipulated if read_only is True. In practice, we only save |
| - one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). |
| - """ |
| - outdict = {} |
| - for infile in infiles: |
| - filepath = os.path.join(indir, infile) |
| - filemode = stat.S_IMODE(os.stat(filepath).st_mode) |
| - # Remove write access for non-owner. |
| - filemode &= ~(stat.S_IWGRP | stat.S_IWOTH) |
| - if read_only: |
| - filemode &= ~stat.S_IWUSR |
| - if filemode & stat.S_IXUSR: |
| - filemode |= (stat.S_IXGRP | stat.S_IXOTH) |
| - else: |
| - filemode &= ~(stat.S_IXGRP | stat.S_IXOTH) |
| - outdict[infile] = { |
| - 'mode': filemode, |
| - } |
| - if need_hash: |
| - h = hashlib.sha1() |
| - with open(filepath, 'rb') as f: |
| - h.update(f.read()) |
| - outdict[infile]['sha-1'] = h.hexdigest() |
| - return outdict |
| - |
| - |
| def link_file(outfile, infile, action): |
| """Links a file. The type of link depends on |action|.""" |
| logging.debug('Mapping %s to %s' % (infile, outfile)) |
| + if action not in (HARDLINK, SYMLINK, COPY): |
| + raise ValueError('Unknown mapping action %s' % action) |
| if os.path.isfile(outfile): |
| raise MappingError('%s already exist' % outfile) |
| if action == COPY: |
| shutil.copy(infile, outfile) |
| elif action == SYMLINK and sys.platform != 'win32': |
| + # On windows, symlink are converted to hardlink and fails over to copy. |
| os.symlink(infile, outfile) |
| - elif action == HARDLINK: |
| + else: |
| try: |
| os_link(infile, outfile) |
| except OSError: |
| @@ -116,38 +63,6 @@ def link_file(outfile, infile, action): |
| 'Failed to hardlink, failing back to copy %s to %s' % ( |
| infile, outfile)) |
| shutil.copy(infile, outfile) |
| - else: |
| - raise ValueError('Unknown mapping action %s' % action) |
| - |
| - |
| -def recreate_tree(outdir, indir, infiles, action): |
| - """Creates a new tree with only the input files in it. |
| - |
| - Arguments: |
| - outdir: Output directory to create the files in. |
| - indir: Root directory the infiles are based in. |
| - infiles: List of files to map from |indir| to |outdir|. |
| - action: See assert below. |
| - """ |
| - logging.debug( |
| - 'recreate_tree(%s, %s, %s, %s)' % (outdir, indir, infiles, action)) |
| - logging.info('Mapping from %s to %s' % (indir, outdir)) |
| - |
| - assert action in (HARDLINK, SYMLINK, COPY) |
| - outdir = os.path.normpath(outdir) |
| - if not os.path.isdir(outdir): |
| - logging.info ('Creating %s' % outdir) |
| - os.makedirs(outdir) |
| - # Do not call abspath until the directory exists. |
| - outdir = os.path.abspath(outdir) |
| - |
| - for relfile in infiles: |
| - infile = os.path.join(indir, relfile) |
| - outfile = os.path.join(outdir, relfile) |
| - outsubdir = os.path.dirname(outfile) |
| - if not os.path.isdir(outsubdir): |
| - os.makedirs(outsubdir) |
| - link_file(outfile, infile, action) |
| def _set_write_bit(path, read_only): |
| @@ -177,6 +92,7 @@ def make_writable(root, read_only): |
| def rmtree(root): |
| """Wrapper around shutil.rmtree() to retry automatically on Windows.""" |
| + make_writable(root, False) |
| if sys.platform == 'win32': |
| for i in range(3): |
| try: |
| @@ -189,3 +105,174 @@ def rmtree(root): |
| time.sleep(delay) |
| else: |
| shutil.rmtree(root) |
| + |
| + |
| +def open_remote(file_or_url): |
| + """Reads a file or url.""" |
| + if re.match(r'^https?://.+$', file_or_url): |
| + return urllib.urlopen(file_or_url) |
| + return open(file_or_url, 'rb') |
| + |
| + |
| +def download_or_copy(file_or_url, dest): |
| + """Copies a file or download an url.""" |
| + if re.match(r'^https?://.+$', file_or_url): |
| + urllib.urlretrieve(file_or_url, dest) |
| + else: |
| + shutil.copy(file_or_url, dest) |
| + |
| + |
| +def get_free_space(path): |
| + """Returns the number of free bytes.""" |
| + if sys.platform == 'win32': |
| + free_bytes = ctypes.c_ulonglong(0) |
| + ctypes.windll.kernel32.GetDiskFreeSpaceExW( |
| + ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes)) |
| + return free_bytes.value |
| + f = os.statvfs(path) |
| + return f.f_bfree * f.f_frsize |
| + |
| + |
| +class Cache(object): |
| + """Stateful LRU cache. |
| + |
| + Saves its state as json file. |
| + """ |
| + # Trim if the cache gets larger than that. |
| + MAX_SIZE = 20*1024*1024*1024 |
| + # Trim if disk free space becomes lower than that. |
| + MIN_FREE_SPACE = 1*1024*1024*1024 |
| + STATE_FILE = 'state.json' |
| + |
| + def __init__(self, cache_dir, remote): |
| + self.cache_dir = cache_dir |
| + self.remote = remote |
| + self.state_file = os.path.join(cache_dir, self.STATE_FILE) |
| + # The files are kept as an array in a LRU style. E.g. self.state[0] is the |
| + # oldest item. |
| + self.state = [] |
| + |
| + if not os.path.isdir(self.cache_dir): |
| + os.makedirs(self.cache_dir) |
| + if os.path.isfile(self.state_file): |
| + try: |
| + self.state = json.load(open(self.state_file, 'rb')) |
| + except ValueError: |
| + # Too bad. The file will be overwritten and the cache cleared. |
| + pass |
| + self.trim() |
| + |
| + def trim(self): |
| + """Trims anything we don't know, make sure enough free space exists.""" |
| + for f in os.listdir(self.cache_dir): |
| + if f == self.STATE_FILE or f in self.state: |
| + continue |
| + logging.warn('Unknown file %s from cache' % f) |
| + # Insert as the oldest file. It will be deleted eventually if not |
| + # accessed. |
| + self.state.insert(0, f) |
| + |
| + # Ensure enough free space. |
| + while ( |
| + self.MIN_FREE_SPACE and |
|
Roger Tawa OOO till Jul 10th
2012/03/26 14:50:40
why is this in the condition?
M-A Ruel
2012/03/26 15:55:59
I explained in the __init__ docstring. 0 is a lega
|
| + self.state and |
| + get_free_space(self.cache_dir) < self.MIN_FREE_SPACE): |
| + os.remove(self.path(self.state.pop(0))) |
| + |
| + # Ensure maximum cache size. |
| + while self.MAX_SIZE and self.state and self.total_size() > self.MAX_SIZE: |
|
Roger Tawa OOO till Jul 10th
2012/03/26 14:50:40
do you really want to call total_size() each time
M-A Ruel
2012/03/26 15:55:59
No.
|
| + os.remove(self.path(self.state.pop(0))) |
| + |
| + self.save() |
| + |
| + def retrieve(self, item): |
| + """Retrieves a file from the remote and add it to the cache.""" |
| + assert not '/' in item |
| + try: |
| + index = self.state.index(item) |
| + # Was already in cache. Update it's LRU value. |
| + self.state.pop(index) |
| + self.state.append(item) |
| + return False |
| + except ValueError: |
| + out = self.path(item) |
| + download_or_copy(os.path.join(self.remote, item), out) |
| + self.state.append(item) |
| + return True |
| + finally: |
| + self.save() |
| + |
| + def path(self, item): |
| + """Returns the path to one item.""" |
| + return os.path.join(self.cache_dir, item) |
| + |
| + def total_size(self): |
| + """Retrieves the current cache size.""" |
| + # TODO(maruel): Keep a cache! |
| + return sum(os.stat(self.path(f)).st_size for f in self.state) |
| + |
| + def save(self): |
| + """Saves the LRU ordering.""" |
| + json.dump(self.state, open(self.state_file, 'wb')) |
| + |
| + |
| +def run_tha_test(manifest, cache_dir, remote): |
|
Roger Tawa OOO till Jul 10th
2012/03/26 14:50:40
i assume the typo is intentional :-)
M-A Ruel
2012/03/26 15:55:59
I didn't have enough imagination to figure out a g
|
| + """Downloads the dependencies in the cache, hardlinks them into a temporary |
| + directory and runs the executable. |
| + """ |
| + cache = Cache(cache_dir, remote) |
| + outdir = tempfile.mkdtemp(prefix='run_tha_test') |
| + try: |
| + for filepath, properties in manifest['files'].iteritems(): |
| + infile = properties['sha-1'] |
| + outfile = os.path.join(outdir, filepath) |
| + cache.retrieve(infile) |
| + outfiledir = os.path.dirname(outfile) |
| + if not os.path.isdir(outfiledir): |
| + os.makedirs(outfiledir) |
| + link_file(outfile, cache.path(infile), HARDLINK) |
| + os.chmod(outfile, properties['mode']) |
| + |
| + cwd = os.path.join(outdir, manifest['relative_cwd']) |
| + if not os.path.isdir(cwd): |
| + os.makedirs(cwd) |
| + if manifest.get('read_only'): |
| + make_writable(outdir, True) |
| + cmd = manifest['command'] |
| + logging.info('Running %s, cwd=%s' % (cmd, cwd)) |
| + return subprocess.call(cmd, cwd=cwd) |
| + finally: |
| + cache.save() |
| + rmtree(outdir) |
| + |
| + |
| +def main(): |
| + parser = optparse.OptionParser( |
| + usage='%prog <options>', description=sys.modules[__name__].__doc__) |
| + parser.add_option( |
| + '-v', '--verbose', action='count', default=0, help='Use multiple times') |
| + parser.add_option( |
| + '-m', '--manifest', help='File/url describing what to map or run') |
| + parser.add_option('--no-run', action='store_true', help='Skip the run part') |
| + parser.add_option('--cache', default='cache', help='Cache directory') |
| + parser.add_option('-r', '--remote', help='Remote where to get the items') |
| + |
| + options, args = parser.parse_args() |
| + level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)] |
| + logging.basicConfig( |
| + level=level, |
| + format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s') |
| + |
| + if not options.manifest: |
| + parser.error('--manifest is required.') |
| + if not options.remote: |
| + parser.error('--remote is required.') |
| + if args: |
| + parser.error('Unsupported args %s' % ' '.join(args)) |
| + |
| + manifest = json.load(open_remote(options.manifest)) |
| + return run_tha_test(manifest, os.path.abspath(options.cache), options.remote) |
| + |
| + |
| +if __name__ == '__main__': |
| + sys.exit(main()) |