isolate.py - Issue 11048019: Add everything from src/tools/isolate r159537.

Unified Diff: isolate.py

Issue 11048019: Add everything from src/tools/isolate r159537. (Closed) Base URL: https://git.chromium.org/chromium/tools/swarm_client.git@master

Patch Set: Ensure --similarity is sticky Created 8 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: isolate.py

diff --git a/isolate.py b/isolate.py

new file mode 100755

index 0000000000000000000000000000000000000000..8985fbd45f449c2f2112a0c78bce33d30ba957aa

--- /dev/null

+++ b/isolate.py

@@ -0,0 +1,1926 @@

+#!/usr/bin/env python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Front end tool to manage .isolate files and corresponding tests.

+Run ./isolate.py --help for more detailed information.

+See more information at

+http://dev.chromium.org/developers/testing/isolated-testing

+"""

+import binascii

+import copy

+import hashlib

+import logging

+import optparse

+import os

+import posixpath

+import re

+import stat

+import subprocess

+import sys

+import time

+import urllib

+import urllib2

+import run_swarm_step

+import trace_inputs

+# Import here directly so isolate is easier to use as a library.

+from run_swarm_step import get_flavor

+# Used by process_input().

+NO_INFO, STATS_ONLY, WITH_HASH = range(56, 59)

+SHA_1_NULL = hashlib.sha1().hexdigest()

+PATH_VARIABLES = ('DEPTH', 'PRODUCT_DIR')

+DEFAULT_OSES = ('linux', 'mac', 'win')

+# Files that should be 0-length when mapped.

+KEY_TOUCHED = 'isolate_dependency_touched'

+# Files that should be tracked by the build tool.

+KEY_TRACKED = 'isolate_dependency_tracked'

+# Files that should not be tracked by the build tool.

+KEY_UNTRACKED = 'isolate_dependency_untracked'

+_GIT_PATH = os.path.sep + '.git'

+_SVN_PATH = os.path.sep + '.svn'

+# The maximum number of upload attempts to try when uploading a single file.

+MAX_UPLOAD_ATTEMPTS = 5

+# The minimum size of files to upload directly to the blobstore.

+MIN_SIZE_FOR_DIRECT_BLOBSTORE = 20 * 8

+class ExecutionError(Exception):

+ """A generic error occurred."""

+ def __str__(self):

+ return self.args[0]

+### Path handling code.

+def relpath(path, root):

+ """os.path.relpath() that keeps trailing os.path.sep."""

+ out = os.path.relpath(path, root)

+ if path.endswith(os.path.sep):

+ out += os.path.sep

+ return out

+def normpath(path):

+ """os.path.normpath() that keeps trailing os.path.sep."""

+ out = os.path.normpath(path)

+ if path.endswith(os.path.sep):

+ out += os.path.sep

+ return out

+def posix_relpath(path, root):

+ """posix.relpath() that keeps trailing slash."""

+ out = posixpath.relpath(path, root)

+ if path.endswith('/'):

+ out += '/'

+ return out

+def cleanup_path(x):

+ """Cleans up a relative path. Converts any os.path.sep to '/' on Windows."""

+ if x:

+ x = x.rstrip(os.path.sep).replace(os.path.sep, '/')

+ if x == '.':

+ x = ''

+ if x:

+ x += '/'

+ return x

+def default_blacklist(f):

+ """Filters unimportant files normally ignored."""

+ return (

+ f.endswith(('.pyc', '.run_test_cases', 'testserver.log')) or

+ _GIT_PATH in f or

+ _SVN_PATH in f or

+ f in ('.git', '.svn'))

+def expand_directory_and_symlink(indir, relfile, blacklist):

+ """Expands a single input. It can result in multiple outputs.

+ This function is recursive when relfile is a directory or a symlink.

+ Note: this code doesn't properly handle recursive symlink like one created

+ with:

+ ln -s .. foo

+ """

+ if os.path.isabs(relfile):

+ raise run_swarm_step.MappingError(

+ 'Can\'t map absolute path %s' % relfile)

+ infile = normpath(os.path.join(indir, relfile))

+ if not infile.startswith(indir):

+ raise run_swarm_step.MappingError(

+ 'Can\'t map file %s outside %s' % (infile, indir))

+ if sys.platform != 'win32':

+ # Look if any item in relfile is a symlink.

+ base, symlink, rest = trace_inputs.split_at_symlink(indir, relfile)

+ if symlink:

+ # Append everything pointed by the symlink. If the symlink is recursive,

+ # this code blows up.

+ symlink_relfile = os.path.join(base, symlink)

+ symlink_path = os.path.join(indir, symlink_relfile)

+ pointed = os.readlink(symlink_path)

+ dest_infile = normpath(

+ os.path.join(os.path.dirname(symlink_path), pointed))

+ if rest:

+ dest_infile = trace_inputs.safe_join(dest_infile, rest)

+ if not dest_infile.startswith(indir):

+ raise run_swarm_step.MappingError(

+ 'Can\'t map symlink reference %s (from %s) ->%s outside of %s' %

+ (symlink_relfile, relfile, dest_infile, indir))

+ if infile.startswith(dest_infile):

+ raise run_swarm_step.MappingError(

+ 'Can\'t map recursive symlink reference %s->%s' %

+ (symlink_relfile, dest_infile))

+ dest_relfile = dest_infile[len(indir)+1:]

+ logging.info('Found symlink: %s -> %s' % (symlink_relfile, dest_relfile))

+ out = expand_directory_and_symlink(indir, dest_relfile, blacklist)

+ # Add the symlink itself.

+ out.append(symlink_relfile)

+ return out

+ if relfile.endswith(os.path.sep):

+ if not os.path.isdir(infile):

+ raise run_swarm_step.MappingError(

+ '%s is not a directory but ends with "%s"' % (infile, os.path.sep))

+ outfiles = []

+ for filename in os.listdir(infile):

+ inner_relfile = os.path.join(relfile, filename)

+ if blacklist(inner_relfile):

+ continue

+ if os.path.isdir(os.path.join(indir, inner_relfile)):

+ inner_relfile += os.path.sep

+ outfiles.extend(

+ expand_directory_and_symlink(indir, inner_relfile, blacklist))

+ return outfiles

+ else:

+ # Always add individual files even if they were blacklisted.

+ if os.path.isdir(infile):

+ raise run_swarm_step.MappingError(

+ 'Input directory %s must have a trailing slash' % infile)

+ if not os.path.isfile(infile):

+ raise run_swarm_step.MappingError(

+ 'Input file %s doesn\'t exist' % infile)

+ return [relfile]

+def expand_directories_and_symlinks(indir, infiles, blacklist):

+ """Expands the directories and the symlinks, applies the blacklist and

+ verifies files exist.

+ Files are specified in os native path separator.

+ """

+ outfiles = []

+ for relfile in infiles:

+ outfiles.extend(expand_directory_and_symlink(indir, relfile, blacklist))

+ return outfiles

+def recreate_tree(outdir, indir, infiles, action, as_sha1):

+ """Creates a new tree with only the input files in it.

+ Arguments:

+ outdir: Output directory to create the files in.

+ indir: Root directory the infiles are based in.

+ infiles: dict of files to map from |indir| to |outdir|.

+ action: See assert below.

+ as_sha1: Output filename is the sha1 instead of relfile.

+ """

+ logging.info(

+ 'recreate_tree(outdir=%s, indir=%s, files=%d, action=%s, as_sha1=%s)' %

+ (outdir, indir, len(infiles), action, as_sha1))

+ assert action in (

+ run_swarm_step.HARDLINK,

+ run_swarm_step.SYMLINK,

+ run_swarm_step.COPY)

+ outdir = os.path.normpath(outdir)

+ if not os.path.isdir(outdir):

+ logging.info ('Creating %s' % outdir)

+ os.makedirs(outdir)

+ # Do not call abspath until the directory exists.

+ outdir = os.path.abspath(outdir)

+ for relfile, metadata in infiles.iteritems():

+ infile = os.path.join(indir, relfile)

+ if as_sha1:

+ # Do the hashtable specific checks.

+ if 'link' in metadata:

+ # Skip links when storing a hashtable.

+ continue

+ outfile = os.path.join(outdir, metadata['sha-1'])

+ if os.path.isfile(outfile):

+ # Just do a quick check that the file size matches. No need to stat()

+ # again the input file, grab the value from the dict.

+ if metadata['size'] == os.stat(outfile).st_size:

+ continue

+ else:

+ logging.warn('Overwritting %s' % metadata['sha-1'])

+ os.remove(outfile)

+ else:

+ outfile = os.path.join(outdir, relfile)

+ outsubdir = os.path.dirname(outfile)

+ if not os.path.isdir(outsubdir):

+ os.makedirs(outsubdir)

+ # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.

+ # if metadata.get('touched_only') == True:

+ # open(outfile, 'ab').close()

+ if 'link' in metadata:

+ pointed = metadata['link']

+ logging.debug('Symlink: %s -> %s' % (outfile, pointed))

+ os.symlink(pointed, outfile)

+ else:

+ run_swarm_step.link_file(outfile, infile, action)

+def encode_multipart_formdata(fields, files,

+ mime_mapper=lambda _: 'application/octet-stream'):

+ """Encodes a Multipart form data object.

+ Args:

+ fields: a sequence (name, value) elements for

+ regular form fields.

+ files: a sequence of (name, filename, value) elements for data to be

+ uploaded as files.

+ mime_mapper: function to return the mime type from the filename.

+ Returns:

+ content_type: for httplib.HTTP instance

+ body: for httplib.HTTP instance

+ """

+ boundary = hashlib.md5(str(time.time())).hexdigest()

+ body_list = []

+ for (key, value) in fields:

+ body_list.append('--' + boundary)

+ body_list.append('Content-Disposition: form-data; name="%s"' % key)

+ body_list.append('')

+ body_list.append(value)

+ body_list.append('--' + boundary)

+ body_list.append('')

+ for (key, filename, value) in files:

+ body_list.append('--' + boundary)

+ body_list.append('Content-Disposition: form-data; name="%s"; '

+ 'filename="%s"' % (key, filename))

+ body_list.append('Content-Type: %s' % mime_mapper(filename))

+ body_list.append('')

+ body_list.append(value)

+ body_list.append('--' + boundary)

+ body_list.append('')

+ if body_list:

+ body_list[-2] += '--'

+ body = '\r\n'.join(body_list)

+ content_type = 'multipart/form-data; boundary=%s' % boundary

+ return content_type, body

+def upload_hash_content(url, params=None, payload=None,

+ content_type='application/octet-stream'):

+ """Uploads the given hash contents.

+ Arguments:

+ url: The url to upload the hash contents to.

+ params: The params to include with the upload.

+ payload: The data to upload.

+ content_type: The content_type of the data being uploaded.

+ """

+ if params:

+ url = url + '?' + urllib.urlencode(params)

+ request = urllib2.Request(url, data=payload)

+ request.add_header('Content-Type', content_type)

+ request.add_header('Content-Length', len(payload or ''))

+ return urllib2.urlopen(request)

+def upload_hash_content_to_blobstore(generate_upload_url, params,

+ hash_data):

+ """Uploads the given hash contents directly to the blobsotre via a generated

+ url.

+ Arguments:

+ generate_upload_url: The url to get the new upload url from.

+ params: The params to include with the upload.

+ hash_contents: The contents to upload.

+ """

+ content_type, body = encode_multipart_formdata(

+ params.items(), [('hash_contents', 'hash_contest', hash_data)])

+ logging.debug('Generating url to directly upload file to blobstore')

+ response = urllib2.urlopen(generate_upload_url)

+ upload_url = response.read()

+ if not upload_url:

+ logging.error('Unable to generate upload url')

+ return

+ return upload_hash_content(upload_url, payload=body,

+ content_type=content_type)

+class UploadRemote(run_swarm_step.Remote):

+ @staticmethod

+ def get_file_handler(base_url):

+ def upload_file(hash_data, hash_key):

+ params = {'hash_key': hash_key}

+ if len(hash_data) > MIN_SIZE_FOR_DIRECT_BLOBSTORE:

+ upload_hash_content_to_blobstore(

+ base_url.rstrip('/') + '/content/generate_blobstore_url',

+ params, hash_data)

+ else:

+ upload_hash_content(

+ base_url.rstrip('/') + '/content/store', params, hash_data)

+ return upload_file

+def url_open(url, data=None, max_retries=MAX_UPLOAD_ATTEMPTS):

+ """Opens the given url with the given data, repeating up to max_retries

+ times if it encounters an error.

+ Arguments:

+ url: The url to open.

+ data: The data to send to the url.

+ max_retries: The maximum number of times to try connecting to the url.

+ Returns:

+ The response from the url, or it raises an exception it it failed to get

+ a response.

+ """

+ for _ in range(max_retries):

+ try:

+ response = urllib2.urlopen(url, data=data)

+ except urllib2.URLError as e:

+ logging.warning('Unable to connect to %s, error msg: %s', url, e)

+ time.sleep(1)

+ # If we get no response from the server after max_retries, assume it

+ # is down and raise an exception

+ if response is None:

+ raise run_swarm_step.MappingError('Unable to connect to server, %s, '

+ 'to see which files are presents' %

+ url)

+ return response

+def update_files_to_upload(query_url, queries, files_to_upload):

+ """Queries the server to see which files from this batch already exist there.

+ Arguments:

+ queries: The hash files to potential upload to the server.

+ files_to_upload: Any new files that need to be upload are added to

+ this list.

+ """

+ body = ''.join(

+ (binascii.unhexlify(meta_data['sha-1']) for (_, meta_data) in queries))

+ response = url_open(query_url, data=body).read()

+ if len(queries) != len(response):

+ raise run_swarm_step.MappingError(

+ 'Got an incorrect number of responses from the server. Expected %d, '

+ 'but got %d' % (len(queries), len(response)))

+ for i in range(len(response)):

+ if response[i] == chr(0):

+ files_to_upload.append(queries[i])

+ else:

+ logging.debug('Hash for %s already exists on the server, no need '

+ 'to upload again', queries[i][0])

+def upload_sha1_tree(base_url, indir, infiles):

+ """Uploads the given tree to the given url.

+ Arguments:

+ base_url: The base url, it is assume that |base_url|/has/ can be used to

+ query if an element was already uploaded, and |base_url|/store/

+ can be used to upload a new element.

+ indir: Root directory the infiles are based in.

+ infiles: dict of files to map from |indir| to |outdir|.

+ """

+ logging.info('upload tree(base_url=%s, indir=%s, files=%d)' %

+ (base_url, indir, len(infiles)))

+ # Generate the list of files that need to be uploaded (since some may already

+ # be on the server.

+ base_url = base_url.rstrip('/')

+ contains_hash_url = base_url + '/content/contains'

+ to_upload = []

+ next_queries = []

+ for relfile, metadata in infiles.iteritems():

+ if 'link' in metadata:

+ # Skip links when uploading.

+ continue

+ next_queries.append((relfile, metadata))

+ if len(next_queries) == 1000:

+ update_files_to_upload(contains_hash_url, next_queries, to_upload)

+ next_queries = []

+ if next_queries:

+ update_files_to_upload(contains_hash_url, next_queries, to_upload)

+ # Upload the required files.

+ remote_uploader = UploadRemote(base_url)

+ for relfile, metadata in to_upload:

+ # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.

+ # if metadata.get('touched_only') == True:

+ # hash_data = ''

+ infile = os.path.join(indir, relfile)

+ with open(infile, 'rb') as f:

+ hash_data = f.read()

+ remote_uploader.add_item(run_swarm_step.Remote.MED,

+ hash_data,

+ metadata['sha-1'])

+ remote_uploader.join()

+ exception = remote_uploader.next_exception()

+ if exception:

+ while exception:

+ logging.error('Error uploading file to server:\n%s', exception[1])

+ exception = remote_uploader.next_exception()

+ raise run_swarm_step.MappingError(

+ 'Encountered errors uploading hash contents to server. See logs for '

+ 'exact failures')

+def process_input(filepath, prevdict, level, read_only):

+ """Processes an input file, a dependency, and return meta data about it.

+ Arguments:

+ - filepath: File to act on.

+ - prevdict: the previous dictionary. It is used to retrieve the cached sha-1

+ to skip recalculating the hash.

+ - level: determines the amount of information retrieved.

+ - read_only: If True, the file mode is manipulated. In practice, only save

+ one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On

+ windows, mode is not set since all files are 'executable' by

+ default.

+ Behaviors:

+ - NO_INFO retrieves no information.

+ - STATS_ONLY retrieves the file mode, file size, file timestamp, file link

+ destination if it is a file link.

+ - WITH_HASH retrieves all of STATS_ONLY plus the sha-1 of the content of the

+ file.

+ """

+ assert level in (NO_INFO, STATS_ONLY, WITH_HASH)

+ out = {}

+ # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.

+ # if prevdict.get('touched_only') == True:

+ # # The file's content is ignored. Skip the time and hard code mode.

+ # if get_flavor() != 'win':

+ # out['mode'] = stat.S_IRUSR | stat.S_IRGRP

+ # out['size'] = 0

+ # out['sha-1'] = SHA_1_NULL

+ # out['touched_only'] = True

+ # return out

+ if level >= STATS_ONLY:

+ try:

+ filestats = os.lstat(filepath)

+ except OSError:

+ # The file is not present.

+ raise run_swarm_step.MappingError('%s is missing' % filepath)

+ is_link = stat.S_ISLNK(filestats.st_mode)

+ if get_flavor() != 'win':

+ # Ignore file mode on Windows since it's not really useful there.

+ filemode = stat.S_IMODE(filestats.st_mode)

+ # Remove write access for group and all access to 'others'.

+ filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)

+ if read_only:

+ filemode &= ~stat.S_IWUSR

+ if filemode & stat.S_IXUSR:

+ filemode |= stat.S_IXGRP

+ else:

+ filemode &= ~stat.S_IXGRP

+ out['mode'] = filemode

+ if not is_link:

+ out['size'] = filestats.st_size

+ # Used to skip recalculating the hash. Use the most recent update time.

+ out['timestamp'] = int(round(filestats.st_mtime))

+ # If the timestamp wasn't updated, carry on the sha-1.

+ if prevdict.get('timestamp') == out['timestamp']:

+ if 'sha-1' in prevdict:

+ # Reuse the previous hash.

+ out['sha-1'] = prevdict['sha-1']

+ if 'link' in prevdict:

+ # Reuse the previous link destination.

+ out['link'] = prevdict['link']

+ if is_link and not 'link' in out:

+ # A symlink, store the link destination.

+ out['link'] = os.readlink(filepath)

+ if level >= WITH_HASH and not out.get('sha-1') and not out.get('link'):

+ if not is_link:

+ with open(filepath, 'rb') as f:

+ out['sha-1'] = hashlib.sha1(f.read()).hexdigest()

+ return out

+### Variable stuff.

+def result_to_state(filename):

+ """Replaces the file's extension."""

+ return filename.rsplit('.', 1)[0] + '.state'

+def determine_root_dir(relative_root, infiles):

+ """For a list of infiles, determines the deepest root directory that is

+ referenced indirectly.

+ All arguments must be using os.path.sep.

+ """

+ # The trick used to determine the root directory is to look at "how far" back

+ # up it is looking up.

+ deepest_root = relative_root

+ for i in infiles:

+ x = relative_root

+ while i.startswith('..' + os.path.sep):

+ i = i[3:]

+ assert not i.startswith(os.path.sep)

+ x = os.path.dirname(x)

+ if deepest_root.startswith(x):

+ deepest_root = x

+ logging.debug(

+ 'determine_root_dir(%s, %d files) -> %s' % (

+ relative_root, len(infiles), deepest_root))

+ return deepest_root

+def replace_variable(part, variables):

+ m = re.match(r'<$([A-Z_]+)$', part)

+ if m:

+ if m.group(1) not in variables:

+ raise ExecutionError(

+ 'Variable "%s" was not found in %s.\nDid you forget to specify '

+ '--variable?' % (m.group(1), variables))

+ return variables[m.group(1)]

+ return part

+def process_variables(variables, relative_base_dir):

+ """Processes path variables as a special case and returns a copy of the dict.

+ For each 'path' variable: first normalizes it, verifies it exists, converts it

+ to an absolute path, then sets it as relative to relative_base_dir.

+ """

+ variables = variables.copy()

+ for i in PATH_VARIABLES:

+ if i not in variables:

+ continue

+ variable = os.path.normpath(variables[i])

+ if not os.path.isdir(variable):

+ raise ExecutionError('%s=%s is not a directory' % (i, variable))

+ # Variables could contain / or \ on windows. Always normalize to

+ # os.path.sep.

+ variable = os.path.abspath(variable.replace('/', os.path.sep))

+ # All variables are relative to the .isolate file.

+ variables[i] = os.path.relpath(variable, relative_base_dir)

+ return variables

+def eval_variables(item, variables):

+ """Replaces the .isolate variables in a string item.

+ Note that the .isolate format is a subset of the .gyp dialect.

+ """

+ return ''.join(

+ replace_variable(p, variables) for p in re.split(r'(<$[A-Z_]+$)', item))

+def classify_files(root_dir, tracked, untracked):

+ """Converts the list of files into a .isolate 'variables' dictionary.

+ Arguments:

+ - tracked: list of files names to generate a dictionary out of that should

+ probably be tracked.

+ - untracked: list of files names that must not be tracked.

+ """

+ # These directories are not guaranteed to be always present on every builder.

+ OPTIONAL_DIRECTORIES = (

+ 'test/data/plugin',

+ 'third_party/WebKit/LayoutTests',

+ )

+ new_tracked = []

+ new_untracked = list(untracked)

+ def should_be_tracked(filepath):

+ """Returns True if it is a file without whitespace in a non-optional

+ directory that has no symlink in its path.

+ """

+ if filepath.endswith('/'):

+ return False

+ if ' ' in filepath:

+ return False

+ if any(i in filepath for i in OPTIONAL_DIRECTORIES):

+ return False

+ # Look if any element in the path is a symlink.

+ split = filepath.split('/')

+ for i in range(len(split)):

+ if os.path.islink(os.path.join(root_dir, '/'.join(split[:i+1]))):

+ return False

+ return True

+ for filepath in sorted(tracked):

+ if should_be_tracked(filepath):

+ new_tracked.append(filepath)

+ else:

+ # Anything else.

+ new_untracked.append(filepath)

+ variables = {}

+ if new_tracked:

+ variables[KEY_TRACKED] = sorted(new_tracked)

+ if new_untracked:

+ variables[KEY_UNTRACKED] = sorted(new_untracked)

+ return variables

+def generate_simplified(

+ tracked, untracked, touched, root_dir, variables, relative_cwd):

+ """Generates a clean and complete .isolate 'variables' dictionary.

+ Cleans up and extracts only files from within root_dir then processes

+ variables and relative_cwd.

+ """

+ logging.info(

+ 'generate_simplified(%d files, %s, %s, %s)' %

+ (len(tracked) + len(untracked) + len(touched),

+ root_dir, variables, relative_cwd))

+ # Constants.

+ # Skip log in PRODUCT_DIR. Note that these are applied on '/' style path

+ # separator.

+ LOG_FILE = re.compile(r'^\<$PRODUCT_DIR$\/[^\/]+\.log$')

+ EXECUTABLE = re.compile(

+ r'^(\<$PRODUCT_DIR$\/[^\/\.]+)' +

+ re.escape(variables.get('EXECUTABLE_SUFFIX', '')) +

+ r'$')

+ # Preparation work.

+ relative_cwd = cleanup_path(relative_cwd)

+ # Creates the right set of variables here. We only care about PATH_VARIABLES.

+ variables = dict(

+ ('<(%s)' % k, variables[k].replace(os.path.sep, '/'))

+ for k in PATH_VARIABLES if k in variables)

+ # Actual work: Process the files.

+ # TODO(maruel): if all the files in a directory are in part tracked and in

+ # part untracked, the directory will not be extracted. Tracked files should be

+ # 'promoted' to be untracked as needed.

+ tracked = trace_inputs.extract_directories(

+ root_dir, tracked, default_blacklist)

+ untracked = trace_inputs.extract_directories(

+ root_dir, untracked, default_blacklist)

+ # touched is not compressed, otherwise it would result in files to be archived

+ # that we don't need.

+ def fix(f):

+ """Bases the file on the most restrictive variable."""

+ logging.debug('fix(%s)' % f)

+ # Important, GYP stores the files with / and not \.

+ f = f.replace(os.path.sep, '/')

+ # If it's not already a variable.

+ if not f.startswith('<'):

+ # relative_cwd is usually the directory containing the gyp file. It may be

+ # empty if the whole directory containing the gyp file is needed.

+ f = posix_relpath(f, relative_cwd) or './'

+ for variable, root_path in variables.iteritems():

+ if f.startswith(root_path):

+ f = variable + f[len(root_path):]

+ break

+ # Now strips off known files we want to ignore and to any specific mangling

+ # as necessary. It's easier to do it here than generate a blacklist.

+ match = EXECUTABLE.match(f)

+ if match:

+ return match.group(1) + '<(EXECUTABLE_SUFFIX)'

+ # Blacklist logs and 'First Run' in the PRODUCT_DIR. First Run is not

+ # created by the compile, but by the test itself.

+ if LOG_FILE.match(f) or f == '<(PRODUCT_DIR)/First Run':

+ return None

+ if sys.platform == 'darwin':

+ # On OSX, the name of the output is dependent on gyp define, it can be

+ # 'Google Chrome.app' or 'Chromium.app', same for 'XXX

+ # Framework.framework'. Furthermore, they are versioned with a gyp

+ # variable. To lower the complexity of the .isolate file, remove all the

+ # individual entries that show up under any of the 4 entries and replace

+ # them with the directory itself. Overall, this results in a bit more

+ # files than strictly necessary.

+ OSX_BUNDLES = (

+ '<(PRODUCT_DIR)/Chromium Framework.framework/',

+ '<(PRODUCT_DIR)/Chromium.app/',

+ '<(PRODUCT_DIR)/Google Chrome Framework.framework/',

+ '<(PRODUCT_DIR)/Google Chrome.app/',

+ )

+ for prefix in OSX_BUNDLES:

+ if f.startswith(prefix):

+ # Note this result in duplicate values, so the a set() must be used to

+ # remove duplicates.

+ return prefix

+ return f

+ tracked = set(filter(None, (fix(f.path) for f in tracked)))

+ untracked = set(filter(None, (fix(f.path) for f in untracked)))

+ touched = set(filter(None, (fix(f.path) for f in touched)))

+ out = classify_files(root_dir, tracked, untracked)

+ if touched:

+ out[KEY_TOUCHED] = sorted(touched)

+ return out

+def generate_isolate(

+ tracked, untracked, touched, root_dir, variables, relative_cwd):

+ """Generates a clean and complete .isolate file."""

+ result = generate_simplified(

+ tracked, untracked, touched, root_dir, variables, relative_cwd)

+ return {

+ 'conditions': [

+ ['OS=="%s"' % get_flavor(), {

+ 'variables': result,

+ }],

+ ],

+ }

+def split_touched(files):

+ """Splits files that are touched vs files that are read."""

+ tracked = []

+ touched = []

+ for f in files:

+ if f.size:

+ tracked.append(f)

+ else:

+ touched.append(f)

+ return tracked, touched

+def pretty_print(variables, stdout):

+ """Outputs a gyp compatible list from the decoded variables.

+ Similar to pprint.print() but with NIH syndrome.

+ """

+ # Order the dictionary keys by these keys in priority.

+ ORDER = (

+ 'variables', 'condition', 'command', 'relative_cwd', 'read_only',

+ KEY_TRACKED, KEY_UNTRACKED)

+ def sorting_key(x):

+ """Gives priority to 'most important' keys before the others."""

+ if x in ORDER:

+ return str(ORDER.index(x))

+ return x

+ def loop_list(indent, items):

+ for item in items:

+ if isinstance(item, basestring):

+ stdout.write('%s\'%s\',\n' % (indent, item))

+ elif isinstance(item, dict):

+ stdout.write('%s{\n' % indent)

+ loop_dict(indent + ' ', item)

+ stdout.write('%s},\n' % indent)

+ elif isinstance(item, list):

+ # A list inside a list will write the first item embedded.

+ stdout.write('%s[' % indent)

+ for index, i in enumerate(item):

+ if isinstance(i, basestring):

+ stdout.write(

+ '\'%s\', ' % i.replace('\\', '\\\\').replace('\'', '\\\''))

+ elif isinstance(i, dict):

+ stdout.write('{\n')

+ loop_dict(indent + ' ', i)

+ if index != len(item) - 1:

+ x = ', '

+ else:

+ x = ''

+ stdout.write('%s}%s' % (indent, x))

+ else:

+ assert False

+ stdout.write('],\n')

+ else:

+ assert False

+ def loop_dict(indent, items):

+ for key in sorted(items, key=sorting_key):

+ item = items[key]

+ stdout.write("%s'%s': " % (indent, key))

+ if isinstance(item, dict):

+ stdout.write('{\n')

+ loop_dict(indent + ' ', item)

+ stdout.write(indent + '},\n')

+ elif isinstance(item, list):

+ stdout.write('[\n')

+ loop_list(indent + ' ', item)

+ stdout.write(indent + '],\n')

+ elif isinstance(item, basestring):

+ stdout.write(

+ '\'%s\',\n' % item.replace('\\', '\\\\').replace('\'', '\\\''))

+ elif item in (True, False, None):

+ stdout.write('%s\n' % item)

+ else:

+ assert False, item

+ stdout.write('{\n')

+ loop_dict(' ', variables)

+ stdout.write('}\n')

+def union(lhs, rhs):

+ """Merges two compatible datastructures composed of dict/list/set."""

+ assert lhs is not None or rhs is not None

+ if lhs is None:

+ return copy.deepcopy(rhs)

+ if rhs is None:

+ return copy.deepcopy(lhs)

+ assert type(lhs) == type(rhs), (lhs, rhs)

+ if hasattr(lhs, 'union'):

+ # Includes set, OSSettings and Configs.

+ return lhs.union(rhs)

+ if isinstance(lhs, dict):

+ return dict((k, union(lhs.get(k), rhs.get(k))) for k in set(lhs).union(rhs))

+ elif isinstance(lhs, list):

+ # Do not go inside the list.

+ return lhs + rhs

+ assert False, type(lhs)

+def extract_comment(content):

+ """Extracts file level comment."""

+ out = []

+ for line in content.splitlines(True):

+ if line.startswith('#'):

+ out.append(line)

+ else:

+ break

+ return ''.join(out)

+def eval_content(content):

+ """Evaluates a python file and return the value defined in it.

+ Used in practice for .isolate files.

+ """

+ globs = {'__builtins__': None}

+ locs = {}

+ value = eval(content, globs, locs)

+ assert locs == {}, locs

+ assert globs == {'__builtins__': None}, globs

+ return value

+def verify_variables(variables):

+ """Verifies the |variables| dictionary is in the expected format."""

+ VALID_VARIABLES = [

+ KEY_TOUCHED,

+ KEY_TRACKED,

+ KEY_UNTRACKED,

+ 'command',

+ 'read_only',

+ ]

+ assert isinstance(variables, dict), variables

+ assert set(VALID_VARIABLES).issuperset(set(variables)), variables.keys()

+ for name, value in variables.iteritems():

+ if name == 'read_only':

+ assert value in (True, False, None), value

+ else:

+ assert isinstance(value, list), value

+ assert all(isinstance(i, basestring) for i in value), value

+def verify_condition(condition):

+ """Verifies the |condition| dictionary is in the expected format."""

+ VALID_INSIDE_CONDITION = ['variables']

+ assert isinstance(condition, list), condition

+ assert 2 <= len(condition) <= 3, condition

+ assert re.match(r'OS==\"([a-z]+)\"', condition[0]), condition[0]

+ for c in condition[1:]:

+ assert isinstance(c, dict), c

+ assert set(VALID_INSIDE_CONDITION).issuperset(set(c)), c.keys()

+ verify_variables(c.get('variables', {}))

+def verify_root(value):

+ VALID_ROOTS = ['variables', 'conditions']

+ assert isinstance(value, dict), value

+ assert set(VALID_ROOTS).issuperset(set(value)), value.keys()

+ verify_variables(value.get('variables', {}))

+ conditions = value.get('conditions', [])

+ assert isinstance(conditions, list), conditions

+ for condition in conditions:

+ verify_condition(condition)

+def remove_weak_dependencies(values, key, item, item_oses):

+ """Remove any oses from this key if the item is already under a strong key."""

+ if key == KEY_TOUCHED:

+ for stronger_key in (KEY_TRACKED, KEY_UNTRACKED):

+ oses = values.get(stronger_key, {}).get(item, None)

+ if oses:

+ item_oses -= oses

+ return item_oses

+def invert_map(variables):

+ """Converts a dict(OS, dict(deptype, list(dependencies)) to a flattened view.

+ Returns a tuple of:

+ 1. dict(deptype, dict(dependency, set(OSes)) for easier processing.

+ 2. All the OSes found as a set.

+ """

+ KEYS = (

+ KEY_TOUCHED,

+ KEY_TRACKED,

+ KEY_UNTRACKED,

+ 'command',

+ 'read_only',

+ )

+ out = dict((key, {}) for key in KEYS)

+ for os_name, values in variables.iteritems():

+ for key in (KEY_TOUCHED, KEY_TRACKED, KEY_UNTRACKED):

+ for item in values.get(key, []):

+ out[key].setdefault(item, set()).add(os_name)

+ # command needs special handling.

+ command = tuple(values.get('command', []))

+ out['command'].setdefault(command, set()).add(os_name)

+ # read_only needs special handling.

+ out['read_only'].setdefault(values.get('read_only'), set()).add(os_name)

+ return out, set(variables)

+def reduce_inputs(values, oses):

+ """Reduces the invert_map() output to the strictest minimum list.

+ 1. Construct the inverse map first.

+ 2. Look at each individual file and directory, map where they are used and

+ reconstruct the inverse dictionary.

+ 3. Do not convert back to negative if only 2 OSes were merged.

+ Returns a tuple of:

+ 1. the minimized dictionary

+ 2. oses passed through as-is.

+ """

+ KEYS = (

+ KEY_TOUCHED,

+ KEY_TRACKED,

+ KEY_UNTRACKED,

+ 'command',

+ 'read_only',

+ )

+ out = dict((key, {}) for key in KEYS)

+ assert all(oses), oses

+ if len(oses) > 2:

+ for key in KEYS:

+ for item, item_oses in values.get(key, {}).iteritems():

+ item_oses = remove_weak_dependencies(values, key, item, item_oses)

+ if not item_oses:

+ continue

+ # Converts all oses.difference('foo') to '!foo'.

+ assert all(item_oses), item_oses

+ missing = oses.difference(item_oses)

+ if len(missing) == 1:

+ # Replace it with a negative.

+ out[key][item] = set(['!' + tuple(missing)[0]])

+ elif not missing:

+ out[key][item] = set([None])

+ else:

+ out[key][item] = set(item_oses)

+ else:

+ for key in KEYS:

+ for item, item_oses in values.get(key, {}).iteritems():

+ item_oses = remove_weak_dependencies(values, key, item, item_oses)

+ if not item_oses:

+ continue

+ # Converts all oses.difference('foo') to '!foo'.

+ assert None not in item_oses, item_oses

+ out[key][item] = set(item_oses)

+ return out, oses

+def convert_map_to_isolate_dict(values, oses):

+ """Regenerates back a .isolate configuration dict from files and dirs

+ mappings generated from reduce_inputs().

+ """

+ # First, inverse the mapping to make it dict first.

+ config = {}

+ for key in values:

+ for item, oses in values[key].iteritems():

+ if item is None:

+ # For read_only default.

+ continue

+ for cond_os in oses:

+ cond_key = None if cond_os is None else cond_os.lstrip('!')

+ # Insert the if/else dicts.

+ condition_values = config.setdefault(cond_key, [{}, {}])

+ # If condition is negative, use index 1, else use index 0.

+ cond_value = condition_values[int((cond_os or '').startswith('!'))]

+ variables = cond_value.setdefault('variables', {})

+ if item in (True, False):

+ # One-off for read_only.

+ variables[key] = item

+ else:

+ if isinstance(item, tuple) and item:

+ # One-off for command.

+ # Do not merge lists and do not sort!

+ # Note that item is a tuple.

+ assert key not in variables

+ variables[key] = list(item)

+ elif item:

+ # The list of items (files or dirs). Append the new item and keep

+ # the list sorted.

+ l = variables.setdefault(key, [])

+ l.append(item)

+ l.sort()

+ out = {}

+ for o in sorted(config):

+ d = config[o]

+ if o is None:

+ assert not d[1]

+ out = union(out, d[0])

+ else:

+ c = out.setdefault('conditions', [])

+ if d[1]:

+ c.append(['OS=="%s"' % o] + d)

+ else:

+ c.append(['OS=="%s"' % o] + d[0:1])

+ return out

+### Internal state files.

+class OSSettings(object):

+ """Represents the dependencies for an OS. The structure is immutable.

+ It's the .isolate settings for a specific file.

+ """

+ def __init__(self, name, values):

+ self.name = name

+ verify_variables(values)

+ self.touched = sorted(values.get(KEY_TOUCHED, []))

+ self.tracked = sorted(values.get(KEY_TRACKED, []))

+ self.untracked = sorted(values.get(KEY_UNTRACKED, []))

+ self.command = values.get('command', [])[:]

+ self.read_only = values.get('read_only')

+ def union(self, rhs):

+ assert self.name == rhs.name

+ assert not (self.command and rhs.command)

+ var = {

+ KEY_TOUCHED: sorted(self.touched + rhs.touched),

+ KEY_TRACKED: sorted(self.tracked + rhs.tracked),

+ KEY_UNTRACKED: sorted(self.untracked + rhs.untracked),

+ 'command': self.command or rhs.command,

+ 'read_only': rhs.read_only if self.read_only is None else self.read_only,

+ }

+ return OSSettings(self.name, var)

+ def flatten(self):

+ out = {}

+ if self.command:

+ out['command'] = self.command

+ if self.touched:

+ out[KEY_TOUCHED] = self.touched

+ if self.tracked:

+ out[KEY_TRACKED] = self.tracked

+ if self.untracked:

+ out[KEY_UNTRACKED] = self.untracked

+ if self.read_only is not None:

+ out['read_only'] = self.read_only

+ return out

+class Configs(object):

+ """Represents a processed .isolate file.

+ Stores the file in a processed way, split by each the OS-specific

+ configurations.

+ The self.per_os[None] member contains all the 'else' clauses plus the default

+ values. It is not included in the flatten() result.

+ """

+ def __init__(self, oses, file_comment):

+ self.file_comment = file_comment

+ self.per_os = {

+ None: OSSettings(None, {}),

+ }

+ self.per_os.update(dict((name, OSSettings(name, {})) for name in oses))

+ def union(self, rhs):

+ items = list(set(self.per_os.keys() + rhs.per_os.keys()))

+ # Takes the first file comment, prefering lhs.

+ out = Configs(items, self.file_comment or rhs.file_comment)

+ for key in items:

+ out.per_os[key] = union(self.per_os.get(key), rhs.per_os.get(key))

+ return out

+ def add_globals(self, values):

+ for key in self.per_os:

+ self.per_os[key] = self.per_os[key].union(OSSettings(key, values))

+ def add_values(self, for_os, values):

+ self.per_os[for_os] = self.per_os[for_os].union(OSSettings(for_os, values))

+ def add_negative_values(self, for_os, values):

+ """Includes the variables to all OSes except |for_os|.

+ This includes 'None' so unknown OSes gets it too.

+ """

+ for key in self.per_os:

+ if key != for_os:

+ self.per_os[key] = self.per_os[key].union(OSSettings(key, values))

+ def flatten(self):

+ """Returns a flat dictionary representation of the configuration.

+ Skips None pseudo-OS.

+ """

+ return dict(

+ (k, v.flatten()) for k, v in self.per_os.iteritems() if k is not None)

+def load_isolate_as_config(value, file_comment, default_oses):

+ """Parses one .isolate file and returns a Configs() instance.

+ |value| is the loaded dictionary that was defined in the gyp file.

+ The expected format is strict, anything diverting from the format below will

+ throw an assert:

+ {

+ 'variables': {

+ 'command': [

+ ...

+ ],

+ 'isolate_dependency_tracked': [

+ ...

+ ],

+ 'isolate_dependency_untracked': [

+ ...

+ ],

+ 'read_only': False,

+ },

+ 'conditions': [

+ ['OS=="<os>"', {

+ 'variables': {

+ ...

+ },

+ }, { # else

+ 'variables': {

+ ...

+ },

+ }],

+ ...

+ ],

+ }

+ """

+ verify_root(value)

+ # Scan to get the list of OSes.

+ conditions = value.get('conditions', [])

+ oses = set(re.match(r'OS==\"([a-z]+)\"', c[0]).group(1) for c in conditions)

+ oses = oses.union(default_oses)

+ configs = Configs(oses, file_comment)

+ # Global level variables.

+ configs.add_globals(value.get('variables', {}))

+ # OS specific variables.

+ for condition in conditions:

+ condition_os = re.match(r'OS==\"([a-z]+)\"', condition[0]).group(1)

+ configs.add_values(condition_os, condition[1].get('variables', {}))

+ if len(condition) > 2:

+ configs.add_negative_values(

+ condition_os, condition[2].get('variables', {}))

+ return configs

+def load_isolate_for_flavor(content, flavor):

+ """Loads the .isolate file and returns the information unprocessed.

+ Returns the command, dependencies and read_only flag. The dependencies are

+ fixed to use os.path.sep.

+ """

+ # Load the .isolate file, process its conditions, retrieve the command and

+ # dependencies.

+ configs = load_isolate_as_config(eval_content(content), None, DEFAULT_OSES)

+ config = configs.per_os.get(flavor) or configs.per_os.get(None)

+ if not config:

+ raise ExecutionError('Failed to load configuration for \'%s\'' % flavor)

+ # Merge tracked and untracked dependencies, isolate.py doesn't care about the

+ # trackability of the dependencies, only the build tool does.

+ dependencies = [

+ f.replace('/', os.path.sep) for f in config.tracked + config.untracked

+ ]

+ touched = [f.replace('/', os.path.sep) for f in config.touched]

+ return config.command, dependencies, touched, config.read_only

+class Flattenable(object):

+ """Represents data that can be represented as a json file."""

+ MEMBERS = ()

+ def flatten(self):

+ """Returns a json-serializable version of itself.

+ Skips None entries.

+ """

+ items = ((member, getattr(self, member)) for member in self.MEMBERS)

+ return dict((member, value) for member, value in items if value is not None)

+ @classmethod

+ def load(cls, data):

+ """Loads a flattened version."""

+ data = data.copy()

+ out = cls()

+ for member in out.MEMBERS:

+ if member in data:

+ # Access to a protected member XXX of a client class

+ # pylint: disable=W0212

+ out._load_member(member, data.pop(member))

+ if data:

+ raise ValueError(

+ 'Found unexpected entry %s while constructing an object %s' %

+ (data, cls.__name__), data, cls.__name__)

+ return out

+ def _load_member(self, member, value):

+ """Loads a member into self."""

+ setattr(self, member, value)

+ @classmethod

+ def load_file(cls, filename):

+ """Loads the data from a file or return an empty instance."""

+ out = cls()

+ try:

+ out = cls.load(trace_inputs.read_json(filename))

+ logging.debug('Loaded %s(%s)' % (cls.__name__, filename))

+ except (IOError, ValueError):

+ logging.warn('Failed to load %s' % filename)

+ return out

+class Result(Flattenable):

+ """Describes the content of a .result file.

+ This file is used by run_swarm_step.py so its content is strictly only

+ what is necessary to run the test outside of a checkout.

+ It is important to note that the 'files' dict keys are using native OS path

+ separator instead of '/' used in .isolate file.

+ """

+ MEMBERS = (

+ 'command',

+ 'files',

+ 'os',

+ 'read_only',

+ 'relative_cwd',

+ )

+ os = get_flavor()

+ def __init__(self):

+ super(Result, self).__init__()

+ self.command = []

+ self.files = {}

+ self.read_only = None

+ self.relative_cwd = None

+ def update(self, command, infiles, touched, read_only, relative_cwd):

+ """Updates the result state with new information."""

+ self.command = command

+ # Add new files.

+ for f in infiles:

+ self.files.setdefault(f, {})

+ for f in touched:

+ self.files.setdefault(f, {})['touched_only'] = True

+ # Prune extraneous files that are not a dependency anymore.

+ for f in set(self.files).difference(set(infiles).union(touched)):

+ del self.files[f]

+ if read_only is not None:

+ self.read_only = read_only

+ self.relative_cwd = relative_cwd

+ def _load_member(self, member, value):

+ if member == 'os':

+ if value != self.os:

+ raise run_swarm_step.ConfigError(

+ 'The .results file was created on another platform')

+ else:

+ super(Result, self)._load_member(member, value)

+ def __str__(self):

+ out = '%s(\n' % self.__class__.__name__

+ out += ' command: %s\n' % self.command

+ out += ' files: %d\n' % len(self.files)

+ out += ' read_only: %s\n' % self.read_only

+ out += ' relative_cwd: %s)' % self.relative_cwd

+ return out

+class SavedState(Flattenable):

+ """Describes the content of a .state file.

+ The items in this file are simply to improve the developer's life and aren't

+ used by run_swarm_step.py. This file can always be safely removed.

+ isolate_file permits to find back root_dir, variables are used for stateful

+ rerun.

+ """

+ MEMBERS = (

+ 'isolate_file',

+ 'variables',

+ )

+ def __init__(self):

+ super(SavedState, self).__init__()

+ self.isolate_file = None

+ self.variables = {}

+ def update(self, isolate_file, variables):

+ """Updates the saved state with new information."""

+ self.isolate_file = isolate_file

+ self.variables.update(variables)

+ @classmethod

+ def load(cls, data):

+ out = super(SavedState, cls).load(data)

+ if out.isolate_file:

+ out.isolate_file = trace_inputs.get_native_path_case(out.isolate_file)

+ return out

+ def __str__(self):

+ out = '%s(\n' % self.__class__.__name__

+ out += ' isolate_file: %s\n' % self.isolate_file

+ out += ' variables: %s' % ''.join(

+ '\n %s=%s' % (k, self.variables[k]) for k in sorted(self.variables))

+ out += ')'

+ return out

+class CompleteState(object):

+ """Contains all the state to run the task at hand."""

+ def __init__(self, result_file, result, saved_state):

+ super(CompleteState, self).__init__()

+ self.result_file = result_file

+ # Contains the data that will be used by run_swarm_step.py

+ self.result = result

+ # Contains the data to ease developer's use-case but that is not strictly

+ # necessary.

+ self.saved_state = saved_state

+ @classmethod

+ def load_files(cls, result_file):

+ """Loads state from disk."""

+ assert os.path.isabs(result_file), result_file

+ return cls(

+ result_file,

+ Result.load_file(result_file),

+ SavedState.load_file(result_to_state(result_file)))

+ def load_isolate(self, isolate_file, variables):

+ """Updates self.result and self.saved_state with information loaded from a

+ .isolate file.

+ Processes the loaded data, deduce root_dir, relative_cwd.

+ """

+ # Make sure to not depend on os.getcwd().

+ assert os.path.isabs(isolate_file), isolate_file

+ logging.info(

+ 'CompleteState.load_isolate(%s, %s)' % (isolate_file, variables))

+ relative_base_dir = os.path.dirname(isolate_file)

+ # Processes the variables and update the saved state.

+ variables = process_variables(variables, relative_base_dir)

+ self.saved_state.update(isolate_file, variables)

+ with open(isolate_file, 'r') as f:

+ # At that point, variables are not replaced yet in command and infiles.

+ # infiles may contain directory entries and is in posix style.

+ command, infiles, touched, read_only = load_isolate_for_flavor(

+ f.read(), get_flavor())

+ command = [eval_variables(i, self.saved_state.variables) for i in command]

+ infiles = [eval_variables(f, self.saved_state.variables) for f in infiles]

+ touched = [eval_variables(f, self.saved_state.variables) for f in touched]

+ # root_dir is automatically determined by the deepest root accessed with the

+ # form '../../foo/bar'.

+ root_dir = determine_root_dir(relative_base_dir, infiles + touched)

+ # The relative directory is automatically determined by the relative path

+ # between root_dir and the directory containing the .isolate file,

+ # isolate_base_dir.

+ relative_cwd = os.path.relpath(relative_base_dir, root_dir)

+ # Normalize the files based to root_dir. It is important to keep the

+ # trailing os.path.sep at that step.

+ infiles = [

+ relpath(normpath(os.path.join(relative_base_dir, f)), root_dir)

+ for f in infiles

+ ]

+ touched = [

+ relpath(normpath(os.path.join(relative_base_dir, f)), root_dir)

+ for f in touched

+ ]

+ # Expand the directories by listing each file inside. Up to now, trailing

+ # os.path.sep must be kept. Do not expand 'touched'.

+ infiles = expand_directories_and_symlinks(

+ root_dir,

+ infiles,

+ lambda x: re.match(r'.*\.(git|svn|pyc)$', x))

+ # Finally, update the new stuff in the foo.result file, the file that is

+ # used by run_swarm_step.py.

+ self.result.update(command, infiles, touched, read_only, relative_cwd)

+ logging.debug(self)

+ def process_inputs(self, level):

+ """Updates self.result.files with the files' mode and hash.

+ See process_input() for more information.

+ """

+ for infile in sorted(self.result.files):

+ filepath = os.path.join(self.root_dir, infile)

+ self.result.files[infile] = process_input(

+ filepath, self.result.files[infile], level, self.result.read_only)

+ def save_files(self):

+ """Saves both self.result and self.saved_state."""

+ logging.debug('Dumping to %s' % self.result_file)

+ trace_inputs.write_json(self.result_file, self.result.flatten(), True)

+ total_bytes = sum(i.get('size', 0) for i in self.result.files.itervalues())

+ if total_bytes:

+ logging.debug('Total size: %d bytes' % total_bytes)

+ saved_state_file = result_to_state(self.result_file)

+ logging.debug('Dumping to %s' % saved_state_file)

+ trace_inputs.write_json(saved_state_file, self.saved_state.flatten(), True)

+ @property

+ def root_dir(self):

+ """isolate_file is always inside relative_cwd relative to root_dir."""

+ isolate_dir = os.path.dirname(self.saved_state.isolate_file)

+ # Special case '.'.

+ if self.result.relative_cwd == '.':

+ return isolate_dir

+ assert isolate_dir.endswith(self.result.relative_cwd), (

+ isolate_dir, self.result.relative_cwd)

+ return isolate_dir[:-(len(self.result.relative_cwd) + 1)]

+ @property

+ def resultdir(self):

+ """Directory containing the results, usually equivalent to the variable

+ PRODUCT_DIR.

+ """

+ return os.path.dirname(self.result_file)

+ def __str__(self):

+ def indent(data, indent_length):

+ """Indents text."""

+ spacing = ' ' * indent_length

+ return ''.join(spacing + l for l in str(data).splitlines(True))

+ out = '%s(\n' % self.__class__.__name__

+ out += ' root_dir: %s\n' % self.root_dir

+ out += ' result: %s\n' % indent(self.result, 2)

+ out += ' saved_state: %s)' % indent(self.saved_state, 2)

+ return out

+def load_complete_state(options, level):

+ """Loads a CompleteState.

+ This includes data from .isolate, .result and .state files.

+ Arguments:

+ options: Options instance generated with OptionParserIsolate.

+ level: Amount of data to fetch.

+ """

+ if options.result:

+ # Load the previous state if it was present. Namely, "foo.result" and

+ # "foo.state".

+ complete_state = CompleteState.load_files(options.result)

+ else:

+ # Constructs a dummy object that cannot be saved. Useful for temporary

+ # commands like 'run'.

+ complete_state = CompleteState(None, Result(), SavedState())

+ options.isolate = options.isolate or complete_state.saved_state.isolate_file

+ if not options.isolate:

+ raise ExecutionError('A .isolate file is required.')

+ if (complete_state.saved_state.isolate_file and

+ options.isolate != complete_state.saved_state.isolate_file):

+ raise ExecutionError(

+ '%s and %s do not match.' % (

+ options.isolate, complete_state.saved_state.isolate_file))

+ # Then load the .isolate and expands directories.

+ complete_state.load_isolate(options.isolate, options.variables)

+ # Regenerate complete_state.result.files.

+ complete_state.process_inputs(level)

+ return complete_state

+def read_trace_as_isolate_dict(complete_state):

+ """Reads a trace and returns the .isolate dictionary."""

+ api = trace_inputs.get_api()

+ logfile = complete_state.result_file + '.log'

+ if not os.path.isfile(logfile):

+ raise ExecutionError(

+ 'No log file \'%s\' to read, did you forget to \'trace\'?' % logfile)

+ try:

+ results = trace_inputs.load_trace(

+ logfile, complete_state.root_dir, api, default_blacklist)

+ tracked, touched = split_touched(results.existent)

+ value = generate_isolate(

+ tracked,

+ [],

+ touched,

+ complete_state.root_dir,

+ complete_state.saved_state.variables,

+ complete_state.result.relative_cwd)

+ return value

+ except trace_inputs.TracingFailure, e:

+ raise ExecutionError(

+ 'Reading traces failed for: %s\n%s' %

+ (' '.join(complete_state.result.command), str(e)))

+def print_all(comment, data, stream):

+ """Prints a complete .isolate file and its top-level file comment into a

+ stream.

+ """

+ if comment:

+ stream.write(comment)

+ pretty_print(data, stream)

+def merge(complete_state):

+ """Reads a trace and merges it back into the source .isolate file."""

+ value = read_trace_as_isolate_dict(complete_state)

+ # Now take that data and union it into the original .isolate file.

+ with open(complete_state.saved_state.isolate_file, 'r') as f:

+ prev_content = f.read()

+ prev_config = load_isolate_as_config(

+ eval_content(prev_content),

+ extract_comment(prev_content),

+ DEFAULT_OSES)

+ new_config = load_isolate_as_config(value, '', DEFAULT_OSES)

+ config = union(prev_config, new_config)

+ # pylint: disable=E1103

+ data = convert_map_to_isolate_dict(

+ *reduce_inputs(*invert_map(config.flatten())))

+ print 'Updating %s' % complete_state.saved_state.isolate_file

+ with open(complete_state.saved_state.isolate_file, 'wb') as f:

+ print_all(config.file_comment, data, f)

+def CMDcheck(args):

+ """Checks that all the inputs are present and update .result."""

+ parser = OptionParserIsolate(command='check')

+ options, _ = parser.parse_args(args)

+ complete_state = load_complete_state(options, NO_INFO)

+ # Nothing is done specifically. Just store the result and state.

+ complete_state.save_files()

+ return 0

+def CMDhashtable(args):

+ """Creates a hash table content addressed object store.

+ All the files listed in the .result file are put in the output directory with

+ the file name being the sha-1 of the file's content.

+ """

+ parser = OptionParserIsolate(command='hashtable')

+ options, _ = parser.parse_args(args)

+ with run_swarm_step.Profiler('GenerateHashtable'):

+ success = False

+ try:

+ complete_state = load_complete_state(options, WITH_HASH)

+ options.outdir = (

+ options.outdir or os.path.join(complete_state.resultdir, 'hashtable'))

+ # Make sure that complete_state isn't modified until save_files() is

+ # called, because any changes made to it here will propagate to the files

+ # created (which is probably not intended).

+ complete_state.save_files()

+ logging.info('Creating content addressed object store with %d item',

+ len(complete_state.result.files))

+ with open(complete_state.result_file, 'rb') as f:

+ manifest_hash = hashlib.sha1(f.read()).hexdigest()

+ manifest_metadata = {'sha-1': manifest_hash}

+ infiles = complete_state.result.files

+ infiles[complete_state.result_file] = manifest_metadata

+ if re.match(r'^https?://.+$', options.outdir):

+ upload_sha1_tree(

+ base_url=options.outdir,

+ indir=complete_state.root_dir,

+ infiles=infiles)

+ else:

+ recreate_tree(

+ outdir=options.outdir,

+ indir=complete_state.root_dir,

+ infiles=infiles,

+ action=run_swarm_step.HARDLINK,

+ as_sha1=True)

+ success = True

+ finally:

+ # If the command failed, delete the .results file if it exists. This is

+ # important so no stale swarm job is executed.

+ if not success and os.path.isfile(options.result):

+ os.remove(options.result)

+def CMDnoop(args):

+ """Touches --result but does nothing else.

+ This mode is to help transition since some builders do not have all the test

+ data files checked out. Touch result_file and exit silently.

+ """

+ parser = OptionParserIsolate(command='noop')

+ options, _ = parser.parse_args(args)

+ # In particular, do not call load_complete_state().

+ open(options.result, 'a').close()

+ return 0

+def CMDmerge(args):

+ """Reads and merges the data from the trace back into the original .isolate.

+ Ignores --outdir.

+ """

+ parser = OptionParserIsolate(command='merge', require_result=False)

+ options, _ = parser.parse_args(args)

+ complete_state = load_complete_state(options, NO_INFO)

+ merge(complete_state)

+ return 0

+def CMDread(args):

+ """Reads the trace file generated with command 'trace'.

+ Ignores --outdir.

+ """

+ parser = OptionParserIsolate(command='read', require_result=False)

+ options, _ = parser.parse_args(args)

+ complete_state = load_complete_state(options, NO_INFO)

+ value = read_trace_as_isolate_dict(complete_state)

+ pretty_print(value, sys.stdout)

+ return 0

+def CMDremap(args):

+ """Creates a directory with all the dependencies mapped into it.

+ Useful to test manually why a test is failing. The target executable is not

+ run.

+ """

+ parser = OptionParserIsolate(command='remap', require_result=False)

+ options, _ = parser.parse_args(args)

+ complete_state = load_complete_state(options, STATS_ONLY)

+ if not options.outdir:

+ options.outdir = run_swarm_step.make_temp_dir(

+ 'isolate', complete_state.root_dir)

+ else:

+ if not os.path.isdir(options.outdir):

+ os.makedirs(options.outdir)

+ print 'Remapping into %s' % options.outdir

+ if len(os.listdir(options.outdir)):

+ raise ExecutionError('Can\'t remap in a non-empty directory')

+ recreate_tree(

+ outdir=options.outdir,

+ indir=complete_state.root_dir,

+ infiles=complete_state.result.files,

+ action=run_swarm_step.HARDLINK,

+ as_sha1=False)

+ if complete_state.result.read_only:

+ run_swarm_step.make_writable(options.outdir, True)

+ if complete_state.result_file:

+ complete_state.save_files()

+ return 0

+def CMDrun(args):

+ """Runs the test executable in an isolated (temporary) directory.

+ All the dependencies are mapped into the temporary directory and the

+ directory is cleaned up after the target exits. Warning: if -outdir is

+ specified, it is deleted upon exit.

+ Argument processing stops at the first non-recognized argument and these

+ arguments are appended to the command line of the target to run. For example,

+ use: isolate.py -r foo.results -- --gtest_filter=Foo.Bar

+ """

+ parser = OptionParserIsolate(command='run', require_result=False)

+ parser.enable_interspersed_args()

+ options, args = parser.parse_args(args)

+ complete_state = load_complete_state(options, STATS_ONLY)

+ cmd = complete_state.result.command + args

+ if not cmd:

+ raise ExecutionError('No command to run')

+ cmd = trace_inputs.fix_python_path(cmd)

+ try:

+ if not options.outdir:

+ options.outdir = run_swarm_step.make_temp_dir(

+ 'isolate', complete_state.root_dir)

+ else:

+ if not os.path.isdir(options.outdir):

+ os.makedirs(options.outdir)

+ recreate_tree(

+ outdir=options.outdir,

+ indir=complete_state.root_dir,

+ infiles=complete_state.result.files,

+ action=run_swarm_step.HARDLINK,

+ as_sha1=False)

+ cwd = os.path.normpath(

+ os.path.join(options.outdir, complete_state.result.relative_cwd))

+ if not os.path.isdir(cwd):

+ # It can happen when no files are mapped from the directory containing the

+ # .isolate file. But the directory must exist to be the current working

+ # directory.

+ os.makedirs(cwd)

+ if complete_state.result.read_only:

+ run_swarm_step.make_writable(options.outdir, True)

+ logging.info('Running %s, cwd=%s' % (cmd, cwd))

+ result = subprocess.call(cmd, cwd=cwd)

+ finally:

+ if options.outdir:

+ run_swarm_step.rmtree(options.outdir)

+ if complete_state.result_file:

+ complete_state.save_files()

+ return result

+def CMDtrace(args):

+ """Traces the target using trace_inputs.py.

+ It runs the executable without remapping it, and traces all the files it and

+ its child processes access. Then the 'read' command can be used to generate an

+ updated .isolate file out of it.

+ Argument processing stops at the first non-recognized argument and these

+ arguments are appended to the command line of the target to run. For example,

+ use: isolate.py -r foo.results -- --gtest_filter=Foo.Bar

+ """

+ parser = OptionParserIsolate(command='trace')

+ parser.enable_interspersed_args()

+ parser.add_option(

+ '-m', '--merge', action='store_true',

+ help='After tracing, merge the results back in the .isolate file')

+ options, args = parser.parse_args(args)

+ complete_state = load_complete_state(options, STATS_ONLY)

+ cmd = complete_state.result.command + args

+ if not cmd:

+ raise ExecutionError('No command to run')

+ cmd = trace_inputs.fix_python_path(cmd)

+ cwd = os.path.normpath(os.path.join(

+ complete_state.root_dir, complete_state.result.relative_cwd))

+ logging.info('Running %s, cwd=%s' % (cmd, cwd))

+ api = trace_inputs.get_api()

+ logfile = complete_state.result_file + '.log'

+ api.clean_trace(logfile)

+ try:

+ with api.get_tracer(logfile) as tracer:

+ result, _ = tracer.trace(

+ cmd,

+ cwd,

+ 'default',

+ True)

+ except trace_inputs.TracingFailure, e:

+ raise ExecutionError('Tracing failed for: %s\n%s' % (' '.join(cmd), str(e)))

+ complete_state.save_files()

+ if options.merge:

+ merge(complete_state)

+ return result

+class OptionParserIsolate(trace_inputs.OptionParserWithNiceDescription):

+ """Adds automatic --isolate, --result, --out and --variables handling."""

+ def __init__(self, require_result=True, **kwargs):

+ trace_inputs.OptionParserWithNiceDescription.__init__(self, **kwargs)

+ default_variables = [('OS', get_flavor())]

+ if sys.platform in ('win32', 'cygwin'):

+ default_variables.append(('EXECUTABLE_SUFFIX', '.exe'))

+ else:

+ default_variables.append(('EXECUTABLE_SUFFIX', ''))

+ group = optparse.OptionGroup(self, "Common options")

+ group.add_option(

+ '-r', '--result',

+ metavar='FILE',

+ help='.result file to store the json manifest')

+ group.add_option(

+ '-i', '--isolate',

+ metavar='FILE',

+ help='.isolate file to load the dependency data from')

+ group.add_option(

+ '-V', '--variable',

+ nargs=2,

+ action='append',

+ default=default_variables,

+ dest='variables',

+ metavar='FOO BAR',

+ help='Variables to process in the .isolate file, default: %default. '

+ 'Variables are persistent accross calls, they are saved inside '

+ '<results>.state')

+ group.add_option(

+ '-o', '--outdir', metavar='DIR',

+ help='Directory used to recreate the tree or store the hash table. '

+ 'If the environment variable ISOLATE_HASH_TABLE_DIR exists, it '

+ 'will be used. Otherwise, for run and remap, uses a /tmp '

+ 'subdirectory. For the other modes, defaults to the directory '

+ 'containing --result')

+ self.add_option_group(group)

+ self.require_result = require_result

+ def parse_args(self, *args, **kwargs):

+ """Makes sure the paths make sense.

+ On Windows, / and \ are often mixed together in a path.

+ """

+ options, args = trace_inputs.OptionParserWithNiceDescription.parse_args(

+ self, *args, **kwargs)

+ if not self.allow_interspersed_args and args:

+ self.error('Unsupported argument: %s' % args)

+ options.variables = dict(options.variables)

+ if self.require_result and not options.result:

+ self.error('--result is required.')

+ if options.result and not options.result.endswith('.results'):

+ self.error('--result value must end with \'.results\'')

+ if options.result:

+ options.result = os.path.abspath(options.result.replace('/', os.path.sep))

+ if options.isolate:

+ options.isolate = trace_inputs.get_native_path_case(

+ os.path.abspath(

+ options.isolate.replace('/', os.path.sep)))

+ if options.outdir and not re.match(r'^https?://.+$', options.outdir):

+ options.outdir = os.path.abspath(

+ options.outdir.replace('/', os.path.sep))

+ return options, args

+### Glue code to make all the commands works magically.

+CMDhelp = trace_inputs.CMDhelp

+def main(argv):

+ try:

+ return trace_inputs.main_impl(argv)

+ except (

+ ExecutionError,

+ run_swarm_step.MappingError,

+ run_swarm_step.ConfigError) as e:

+ sys.stderr.write('\nError: ')

+ sys.stderr.write(str(e))

+ sys.stderr.write('\n')

+ return 1

+if __name__ == '__main__':

+ sys.exit(main(sys.argv[1:]))

« no previous file with comments | « fix_test_cases.py ('k') | isolate_merge.py » ('j') | no next file with comments »