appengine/swarming/swarming_bot/bot_code/task_runner.py - Issue 2443663002: Pass args in file from task_runner to run_isolated

Unified Diff: appengine/swarming/swarming_bot/bot_code/task_runner.py

Issue 2443663002: Pass args in file from task_runner to run_isolated (Closed)

Patch Set: Respond to code review comments other than changes to options processing Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: appengine/swarming/swarming_bot/bot_code/task_runner.py

diff --git a/appengine/swarming/swarming_bot/bot_code/task_runner.py b/appengine/swarming/swarming_bot/bot_code/task_runner.py

index 38bc1fa9d1a6f11348825a59962a24b47e9f5140..b8eec0acfed44bc35e7648ce6f10b81eecd03781 100644

--- a/appengine/swarming/swarming_bot/bot_code/task_runner.py

+++ b/appengine/swarming/swarming_bot/bot_code/task_runner.py

@@ -83,7 +83,7 @@ def get_run_isolated():

return [sys.executable, THIS_FILE, 'run_isolated']

-def get_isolated_cmd(

+def get_isolated_args(

work_dir, task_details, isolated_result, bot_file, min_free_space):

"""Returns the command to call run_isolated. Mocked in tests."""

assert (bool(task_details.command) !=

@@ -91,7 +91,7 @@ def get_isolated_cmd(

bot_dir = os.path.dirname(work_dir)

if os.path.isfile(isolated_result):

os.remove(isolated_result)

- cmd = get_run_isolated()

+ cmd = []

if task_details.isolated:

cmd.extend(

@@ -343,6 +343,24 @@ def kill_and_wait(proc, grace_period, reason):

return exit_code

+def fail_without_command(remote, bot_id, task_id, params, cost_usd_hour,

+ task_start, exit_code, stdout):

+ now = monotonic_time()

+ params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.

+ params['duration'] = now - task_start

+ params['io_timeout'] = False

+ params['hard_timeout'] = False

+ # Ignore server reply to stop.

+ remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1)

+ return {

+ u'exit_code': exit_code,

+ u'hard_timeout': False,

+ u'io_timeout': False,

+ u'must_signal_internal_failure': None,

+ u'version': OUT_VERSION,

+ }

def run_command(remote, task_details, work_dir, cost_usd_hour,

task_start, min_free_space, bot_file, extra_env):

"""Runs a command and sends packets to the server to stream results back.

@@ -377,7 +395,10 @@ def run_command(remote, task_details, work_dir, cost_usd_hour,

}

isolated_result = os.path.join(work_dir, 'isolated_result.json')

- cmd = get_isolated_cmd(

+ args_path = os.path.join(work_dir, 'run_isolated_args.json')

+ cmd = get_run_isolated()

+ cmd.extend(['-f', args_path])

+ args = get_isolated_args(

work_dir, task_details, isolated_result, bot_file, min_free_space)

# Hard timeout enforcement is deferred to run_isolated. Grace is doubled to

# give one 'grace_period' slot to the child process and one slot to upload

@@ -399,6 +420,22 @@ def run_command(remote, task_details, work_dir, cost_usd_hour,

logging.info('cmd=%s', cmd)

logging.info('cwd=%s', work_dir)

logging.info('env=%s', env)

+ fail_on_start = lambda exit_code, stdout: fail_without_command(

+ remote, bot_id, task_id, params, cost_usd_hour, task_start,

+ exit_code, stdout)

+ # We write args to a file since there may be more of them than the OS

+ # can handle.

+ try:

+ args_file = open(args_path, "w")

M-A Ruel 2016/10/24 16:26:52 with open(...

aludwin 2016/10/24 17:05:48 Do I put that inside a try-catch?

M-A Ruel 2016/10/24 17:11:53 yes, so it looks like: try: with open(args_path

+ json.dump(args, args_file)

+ args_file.close()

+ except (OSError, IOError) as e:

+ return fail_on_start(

+ -1,

+ 'Could not write args to %s: %s' % (args_path, e))

+ # Start the command

try:

assert cmd and all(isinstance(a, basestring) for a in cmd)

proc = subprocess42.Popen(

@@ -410,22 +447,11 @@ def run_command(remote, task_details, work_dir, cost_usd_hour,

stderr=subprocess42.STDOUT,

stdin=subprocess42.PIPE)

except OSError as e:

- stdout = 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e)

- now = monotonic_time()

- params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.

- params['duration'] = now - start

- params['io_timeout'] = False

- params['hard_timeout'] = False

- # Ignore server reply to stop.

- remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1)

- return {

- u'exit_code': 1,

- u'hard_timeout': False,

- u'io_timeout': False,

- u'must_signal_internal_failure': None,

- u'version': OUT_VERSION,

- }

+ return fail_on_start(

+ 1,

+ 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e))

+ # Monitor the task

output_chunk_start = 0

stdout = ''

exit_code = None

« no previous file with comments | « appengine/swarming/swarming_bot/__main__.py ('k') | appengine/swarming/swarming_bot/bot_code/task_runner_test.py » ('j') | client/run_isolated.py » ('J')