| Index: client/run_isolated.py
|
| diff --git a/client/run_isolated.py b/client/run_isolated.py
|
| index 75e8b325a622ef215d9ecc648cbbc918e0732de9..07cd42141bb144dc5cd4f771da4485a2db3aae12 100755
|
| --- a/client/run_isolated.py
|
| +++ b/client/run_isolated.py
|
| @@ -14,7 +14,7 @@ file. All content written to this directory will be uploaded upon termination
|
| and the .isolated file describing this directory will be printed to stdout.
|
| """
|
|
|
| -__version__ = '0.4.4'
|
| +__version__ = '0.5'
|
|
|
| import logging
|
| import optparse
|
| @@ -122,12 +122,137 @@ def change_tree_read_only(rootdir, read_only):
|
|
|
| def process_command(command, out_dir):
|
| """Replaces isolated specific variables in a command line."""
|
| - filtered = []
|
| - for arg in command:
|
| + def fix(arg):
|
| if '${ISOLATED_OUTDIR}' in arg:
|
| - arg = arg.replace('${ISOLATED_OUTDIR}', out_dir).replace('/', os.sep)
|
| - filtered.append(arg)
|
| - return filtered
|
| + return arg.replace('${ISOLATED_OUTDIR}', out_dir).replace('/', os.sep)
|
| + return arg
|
| +
|
| + return [fix(arg) for arg in command]
|
| +
|
| +
|
| +def run_command(command, cwd):
|
| + """Runs the command, returns the process exit code."""
|
| + logging.info('run_command(%s, %s)' % (command, cwd))
|
| + sys.stdout.flush()
|
| + with tools.Profiler('RunTest'):
|
| + try:
|
| + with subprocess42.Popen_with_handler(command, cwd=cwd) as p:
|
| + p.communicate()
|
| + exit_code = p.returncode
|
| + except OSError:
|
| + # This is not considered to be an internal error. The executable simply
|
| + # does not exit.
|
| + exit_code = 1
|
| + logging.info(
|
| + 'Command finished with exit code %d (%s)',
|
| + exit_code, hex(0xffffffff & exit_code))
|
| + return exit_code
|
| +
|
| +
|
| +def delete_and_upload(storage, out_dir, leak_temp_dir):
|
| + """Deletes the temporary run directory and uploads results back.
|
| +
|
| + Returns:
|
| + tuple(outputs_ref, success)
|
| + - outputs_ref is a dict referring to the results archived back to the
|
| + isolated server, if applicable.
|
| + - success is False if something occurred that means that the task must
|
| + forcibly be considered a failure, e.g. zombie processes were left behind.
|
| + """
|
| +
|
| + # Upload out_dir and generate a .isolated file out of this directory. It is
|
| + # only done if files were written in the directory.
|
| + outputs_ref = None
|
| + if os.path.isdir(out_dir) and os.listdir(out_dir):
|
| + with tools.Profiler('ArchiveOutput'):
|
| + try:
|
| + results = isolateserver.archive_files_to_storage(
|
| + storage, [out_dir], None)
|
| + outputs_ref = {
|
| + 'isolated': results[0][0],
|
| + 'isolatedserver': storage.location,
|
| + 'namespace': storage.namespace,
|
| + }
|
| + except isolateserver.Aborted:
|
| + # This happens when a signal SIGTERM was received while uploading data.
|
| + # There is 2 causes:
|
| + # - The task was too slow and was about to be killed anyway due to
|
| + # exceeding the hard timeout.
|
| + # - The amount of data uploaded back is very large and took too much
|
| + # time to archive.
|
| + sys.stderr.write('Received SIGTERM while uploading')
|
| + # Re-raise, so it will be treated as an internal failure.
|
| + raise
|
| + try:
|
| + if not leak_temp_dir and not file_path.rmtree(out_dir):
|
| + logging.error('Had difficulties removing out_dir %s', out_dir)
|
| + return outputs_ref, False
|
| + except OSError as e:
|
| + # When this happens, it means there's a process error.
|
| + logging.error('Had difficulties removing out_dir %s: %s', out_dir, e)
|
| + return outputs_ref, False
|
| + return outputs_ref, True
|
| +
|
| +
|
| +def map_and_run(isolated_hash, storage, cache, leak_temp_dir, extra_args):
|
| + """Maps and run the command. Returns metadata about the result."""
|
| + # TODO(maruel): Include performance statistics.
|
| + result = {
|
| + 'exit_code': None,
|
| + 'internal_failure': None,
|
| + 'outputs_ref': None,
|
| + 'version': 1,
|
| + }
|
| + tmp_root = os.path.dirname(cache.cache_dir) if cache.cache_dir else None
|
| + run_dir = make_temp_dir(u'run_tha_test', tmp_root)
|
| + out_dir = unicode(make_temp_dir(u'isolated_out', tmp_root))
|
| + try:
|
| + bundle = isolateserver.fetch_isolated(
|
| + isolated_hash=isolated_hash,
|
| + storage=storage,
|
| + cache=cache,
|
| + outdir=run_dir,
|
| + require_command=True)
|
| +
|
| + change_tree_read_only(run_dir, bundle.read_only)
|
| + cwd = os.path.normpath(os.path.join(run_dir, bundle.relative_cwd))
|
| + command = bundle.command + extra_args
|
| + file_path.ensure_command_has_abs_path(command, cwd)
|
| + result['exit_code'] = run_command(process_command(command, out_dir), cwd)
|
| + except Exception as e:
|
| + # An internal error occured. Report accordingly so the swarming task will be
|
| + # retried automatically.
|
| + logging.error('internal failure: %s', e)
|
| + result['internal_failure'] = str(e)
|
| + on_error.report(None)
|
| + finally:
|
| + try:
|
| + if leak_temp_dir:
|
| + logging.warning(
|
| + 'Deliberately leaking %s for later examination', run_dir)
|
| + elif not file_path.rmtree(run_dir):
|
| + # On Windows rmtree(run_dir) call above has a synchronization effect: it
|
| + # finishes only when all task child processes terminate (since a running
|
| + # process locks *.exe file). Examine out_dir only after that call
|
| + # completes (since child processes may write to out_dir too and we need
|
| + # to wait for them to finish).
|
| + print >> sys.stderr, (
|
| + 'Failed to delete the temporary directory, forcibly failing\n'
|
| + 'the task because of it. No zombie process can outlive a\n'
|
| + 'successful task run and still be marked as successful.\n'
|
| + 'Fix your stuff.')
|
| + if result['exit_code'] == 0:
|
| + result['exit_code'] = 1
|
| +
|
| + result['outputs_ref'], success = delete_and_upload(
|
| + storage, out_dir, leak_temp_dir)
|
| + if not success and result['exit_code'] == 0:
|
| + result['exit_code'] = 1
|
| + except Exception as e:
|
| + # Swallow any exception in the main finally clause.
|
| + logging.error('Leaking out_dir %s: %s', out_dir, e)
|
| + result['internal_failure'] = str(e)
|
| + return result
|
|
|
|
|
| def run_tha_test(
|
| @@ -150,141 +275,35 @@ def run_tha_test(
|
| in-memory.
|
| leak_temp_dir: if true, the temporary directory will be deliberately leaked
|
| for later examination.
|
| - result_json: file path to dump result metadata into.
|
| + result_json: file path to dump result metadata into. If set, the process
|
| + exit code is always 0 unless an internal error occured.
|
| extra_args: optional arguments to add to the command stated in the .isolate
|
| file.
|
| - """
|
| - tmp_root = os.path.dirname(cache.cache_dir) if cache.cache_dir else None
|
| - run_dir = make_temp_dir(u'run_tha_test', tmp_root)
|
| - out_dir = unicode(make_temp_dir(u'isolated_out', tmp_root))
|
| - result = 0
|
| - try:
|
| - try:
|
| - bundle = isolateserver.fetch_isolated(
|
| - isolated_hash=isolated_hash,
|
| - storage=storage,
|
| - cache=cache,
|
| - outdir=run_dir,
|
| - require_command=True)
|
| - except isolated_format.IsolatedError:
|
| - on_error.report(None)
|
| - return 1
|
| -
|
| - change_tree_read_only(run_dir, bundle.read_only)
|
| - cwd = os.path.normpath(os.path.join(run_dir, bundle.relative_cwd))
|
| - command = bundle.command + extra_args
|
|
|
| - file_path.ensure_command_has_abs_path(command, cwd)
|
| - command = process_command(command, out_dir)
|
| - logging.info('Running %s, cwd=%s' % (command, cwd))
|
| -
|
| - # TODO(csharp): This should be specified somewhere else.
|
| - # TODO(vadimsh): Pass it via 'env_vars' in manifest.
|
| - # Add a rotating log file if one doesn't already exist.
|
| - env = os.environ.copy()
|
| - if MAIN_DIR:
|
| - env.setdefault('RUN_TEST_CASES_LOG_FILE',
|
| - os.path.join(MAIN_DIR, RUN_TEST_CASES_LOG))
|
| + Returns:
|
| + Process exit code that should be used.
|
| + """
|
| + # run_isolated exit code. Depends on if result_json is used or not.
|
| + result = map_and_run(
|
| + isolated_hash, storage, cache, leak_temp_dir, extra_args)
|
| + logging.info('Result:\n%s', tools.format_json(result, dense=True))
|
| + if result_json:
|
| + tools.write_json(result_json, result, dense=True)
|
| + # Only return 1 if there was an internal error.
|
| + return int(bool(result['internal_failure']))
|
| +
|
| + # Marshall into old-style inline output.
|
| + if result['outputs_ref']:
|
| + data = {
|
| + 'hash': result['outputs_ref']['isolated'],
|
| + 'namespace': result['outputs_ref']['namespace'],
|
| + 'storage': result['outputs_ref']['isolatedserver'],
|
| + }
|
| sys.stdout.flush()
|
| - with tools.Profiler('RunTest'):
|
| - try:
|
| - with subprocess42.Popen_with_handler(command, cwd=cwd, env=env) as p:
|
| - p.communicate()
|
| - result = p.returncode
|
| - except OSError:
|
| - on_error.report('Failed to run %s; cwd=%s' % (command, cwd))
|
| - result = 1
|
| - logging.info(
|
| - 'Command finished with exit code %d (%s)',
|
| - result, hex(0xffffffff & result))
|
| - finally:
|
| - try:
|
| - if leak_temp_dir:
|
| - logging.warning('Deliberately leaking %s for later examination',
|
| - run_dir)
|
| - else:
|
| - try:
|
| - if not file_path.rmtree(run_dir):
|
| - print >> sys.stderr, (
|
| - 'Failed to delete the temporary directory, forcibly failing\n'
|
| - 'the task because of it. No zombie process can outlive a\n'
|
| - 'successful task run and still be marked as successful.\n'
|
| - 'Fix your stuff.')
|
| - result = result or 1
|
| - except OSError as exc:
|
| - logging.error('Leaking run_dir %s: %s', run_dir, exc)
|
| - result = 1
|
| -
|
| - # HACK(vadimsh): On Windows rmtree(run_dir) call above has
|
| - # a synchronization effect: it finishes only when all task child processes
|
| - # terminate (since a running process locks *.exe file). Examine out_dir
|
| - # only after that call completes (since child processes may
|
| - # write to out_dir too and we need to wait for them to finish).
|
| -
|
| - # Upload out_dir and generate a .isolated file out of this directory.
|
| - # It is only done if files were written in the directory.
|
| - if os.path.isdir(out_dir) and os.listdir(out_dir):
|
| - with tools.Profiler('ArchiveOutput'):
|
| - try:
|
| - results = isolateserver.archive_files_to_storage(
|
| - storage, [out_dir], None)
|
| - except isolateserver.Aborted:
|
| - # This happens when a signal SIGTERM was received while uploading
|
| - # data. There is 2 causes:
|
| - # - The task was too slow and was about to be killed anyway due to
|
| - # exceeding the hard timeout.
|
| - # - The amount of data uploaded back is very large and took too much
|
| - # time to archive.
|
| - #
|
| - # There's 3 options to handle this:
|
| - # - Ignore the upload failure as a silent failure. This can be
|
| - # detected client side by the fact no result file exists.
|
| - # - Return as if the task failed. This is not factually correct.
|
| - # - Return an internal failure. Sadly, it's impossible at this level
|
| - # at the moment.
|
| - #
|
| - # For now, silently drop the upload.
|
| - #
|
| - # In any case, the process only has a very short grace period so it
|
| - # needs to exit right away.
|
| - sys.stderr.write('Received SIGTERM while uploading')
|
| - results = None
|
| -
|
| - if results:
|
| - if result_json:
|
| - data = {
|
| - 'isolated': results[0][0],
|
| - 'isolatedserver': storage.location,
|
| - 'namespace': storage.namespace,
|
| - }
|
| - tools.write_json(result_json, data, dense=True)
|
| - else:
|
| - data = {
|
| - 'hash': results[0][0],
|
| - 'namespace': storage.namespace,
|
| - 'storage': storage.location,
|
| - }
|
| - sys.stdout.flush()
|
| - print(
|
| - '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
|
| - tools.format_json(data, dense=True))
|
| - logging.info('%s', data)
|
| -
|
| - finally:
|
| - try:
|
| - if os.path.isdir(out_dir) and not file_path.rmtree(out_dir):
|
| - logging.error('Had difficulties removing out_dir %s', out_dir)
|
| - result = result or 1
|
| - except OSError as exc:
|
| - # Only report on non-Windows or on Windows when the process had
|
| - # succeeded. Due to the way file sharing works on Windows, it's sadly
|
| - # expected that file deletion may fail when a test failed.
|
| - logging.error('Failed to remove out_dir %s: %s', out_dir, exc)
|
| - if sys.platform != 'win32' or not result:
|
| - on_error.report(None)
|
| - result = 1
|
| -
|
| - return result
|
| + print(
|
| + '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
|
| + tools.format_json(data, dense=True))
|
| + return result['exit_code'] or int(bool(result['internal_failure']))
|
|
|
|
|
| def main(args):
|
| @@ -293,8 +312,10 @@ def main(args):
|
| usage='%prog <options>',
|
| version=__version__,
|
| log_file=RUN_ISOLATED_LOG_FILE)
|
| -
|
| - parser.add_option('--json', help='dump output metadata to json file')
|
| + parser.add_option(
|
| + '--json',
|
| + help='dump output metadata to json file. When used, run_isolated returns '
|
| + 'non-zero only on internal failure')
|
| data_group = optparse.OptionGroup(parser, 'Data source')
|
| data_group.add_option(
|
| '-s', '--isolated',
|
|
|