appengine/swarming/swarming_bot/bot_code/task_runner.py - Issue 2443663002: Pass args in file from task_runner to run_isolated

Side by Side Diff: appengine/swarming/swarming_bot/bot_code/task_runner.py

Issue 2443663002: Pass args in file from task_runner to run_isolated (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « appengine/swarming/swarming_bot/__main__.py ('k') | appengine/swarming/swarming_bot/bot_code/task_runner_test.py » ('j') | appengine/swarming/swarming_bot/bot_code/task_runner_test.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2013 The LUCI Authors. All rights reserved.	1 # Copyright 2013 The LUCI Authors. All rights reserved.

2 # Use of this source code is governed under the Apache License, Version 2.0	2 # Use of this source code is governed under the Apache License, Version 2.0

3 # that can be found in the LICENSE file.	3 # that can be found in the LICENSE file.

4	4

5 """Runs a Swarming task.	5 """Runs a Swarming task.

6	6

7 Downloads all the necessary files to run the task, executes the command and	7 Downloads all the necessary files to run the task, executes the command and

8 streams results back to the Swarming server.	8 streams results back to the Swarming server.

9	9

10 The process exit code is 0 when the task was executed, even if the task itself	10 The process exit code is 0 when the task was executed, even if the task itself

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
76	76

77	77

78 def get_run_isolated():	78 def get_run_isolated():

79 """Returns the path to itself to run run_isolated.	79 """Returns the path to itself to run run_isolated.

80	80

81 Mocked in test to point to the real run_isolated.py script.	81 Mocked in test to point to the real run_isolated.py script.

82 """	82 """

83 return [sys.executable, THIS_FILE, 'run_isolated']	83 return [sys.executable, THIS_FILE, 'run_isolated']

84	84

85	85

86 def get_isolated_cmd(	86 def get_isolated_args(

87 work_dir, task_details, isolated_result, bot_file, min_free_space):	87 work_dir, task_details, isolated_result, bot_file, min_free_space):

88 """Returns the command to call run_isolated. Mocked in tests."""	88 """Returns the command to call run_isolated. Mocked in tests."""

89 assert (bool(task_details.command) !=	89 assert (bool(task_details.command) !=

90 bool(task_details.isolated and task_details.isolated.get('input')))	90 bool(task_details.isolated and task_details.isolated.get('input')))

91 bot_dir = os.path.dirname(work_dir)	91 bot_dir = os.path.dirname(work_dir)

92 if os.path.isfile(isolated_result):	92 if os.path.isfile(isolated_result):

93 os.remove(isolated_result)	93 os.remove(isolated_result)

94 cmd = get_run_isolated()	94 cmd = []

95	95

96 if task_details.isolated:	96 if task_details.isolated:

97 cmd.extend(	97 cmd.extend(

98 [	98 [

99 '-I', task_details.isolated['server'].encode('utf-8'),	99 '-I', task_details.isolated['server'].encode('utf-8'),

100 '--namespace', task_details.isolated['namespace'].encode('utf-8'),	100 '--namespace', task_details.isolated['namespace'].encode('utf-8'),

101 ])	101 ])

102 isolated_input = task_details.isolated.get('input')	102 isolated_input = task_details.isolated.get('input')

103 if isolated_input:	103 if isolated_input:

104 cmd.extend(	104 cmd.extend(

(...skipping 231 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
336 try:	336 try:

337 proc.wait(grace_period)	337 proc.wait(grace_period)

338 except subprocess42.TimeoutError:	338 except subprocess42.TimeoutError:

339 logging.warning('SIGKILL finally due to %s', reason)	339 logging.warning('SIGKILL finally due to %s', reason)

340 proc.kill()	340 proc.kill()

341 exit_code = proc.wait()	341 exit_code = proc.wait()

342 logging.info('Waiting for process exit in finally - done')	342 logging.info('Waiting for process exit in finally - done')

343 return exit_code	343 return exit_code

344	344

345	345

	346 def fail_without_command(remote, bot_id, task_id, params, cost_usd_hour,

	347 task_start, exit_code, stdout):

	348 now = monotonic_time()

	349 params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.

	350 params['duration'] = now - task_start

	351 params['io_timeout'] = False

	352 params['hard_timeout'] = False

	353 # Ignore server reply to stop.

	354 remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1)

	355 return {

	356 u'exit_code': exit_code,

	357 u'hard_timeout': False,

	358 u'io_timeout': False,

	359 u'must_signal_internal_failure': None,

	360 u'version': OUT_VERSION,

	361 }

	362

	363

346 def run_command(remote, task_details, work_dir, cost_usd_hour,	364 def run_command(remote, task_details, work_dir, cost_usd_hour,

347 task_start, min_free_space, bot_file, extra_env):	365 task_start, min_free_space, bot_file, extra_env):

348 """Runs a command and sends packets to the server to stream results back.	366 """Runs a command and sends packets to the server to stream results back.

349	367

350 Implements both I/O and hard timeouts. Sends the packets numbered, so the	368 Implements both I/O and hard timeouts. Sends the packets numbered, so the

351 server can ensure they are processed in order.	369 server can ensure they are processed in order.

352	370

353 Returns:	371 Returns:

354 Metadata dict with the execution result.	372 Metadata dict with the execution result.

355	373

(...skipping 14 matching lines...) Expand all Loading...
370 # Don't even bother, the task was already canceled.	388 # Don't even bother, the task was already canceled.

371 return {	389 return {

372 u'exit_code': -1,	390 u'exit_code': -1,

373 u'hard_timeout': False,	391 u'hard_timeout': False,

374 u'io_timeout': False,	392 u'io_timeout': False,

375 u'must_signal_internal_failure': None,	393 u'must_signal_internal_failure': None,

376 u'version': OUT_VERSION,	394 u'version': OUT_VERSION,

377 }	395 }

378	396

379 isolated_result = os.path.join(work_dir, 'isolated_result.json')	397 isolated_result = os.path.join(work_dir, 'isolated_result.json')

380 cmd = get_isolated_cmd(	398 args_path = os.path.join(work_dir, 'run_isolated_args.json')

	399 cmd = get_run_isolated()

	400 cmd.extend(['-f', args_path])

	401 args = get_isolated_args(

381 work_dir, task_details, isolated_result, bot_file, min_free_space)	402 work_dir, task_details, isolated_result, bot_file, min_free_space)

382 # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to	403 # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to

383 # give one 'grace_period' slot to the child process and one slot to upload	404 # give one 'grace_period' slot to the child process and one slot to upload

384 # the results back.	405 # the results back.

385 task_details.hard_timeout = 0	406 task_details.hard_timeout = 0

386 if task_details.grace_period:	407 if task_details.grace_period:

387 task_details.grace_period *= 2	408 task_details.grace_period *= 2

388	409

389 try:	410 try:

390 # TODO(maruel): Support both channels independently and display stderr in	411 # TODO(maruel): Support both channels independently and display stderr in

391 # red.	412 # red.

392 env = os.environ.copy()	413 env = os.environ.copy()

393 for env_to_add in (task_details.env, extra_env):	414 for env_to_add in (task_details.env, extra_env):

394 for key, value in (env_to_add or {}).iteritems():	415 for key, value in (env_to_add or {}).iteritems():

395 if not value:	416 if not value:

396 env.pop(key, None)	417 env.pop(key, None)

397 else:	418 else:

398 env[key] = value	419 env[key] = value

399 logging.info('cmd=%s', cmd)	420 logging.info('cmd=%s', cmd)

400 logging.info('cwd=%s', work_dir)	421 logging.info('cwd=%s', work_dir)

401 logging.info('env=%s', env)	422 logging.info('env=%s', env)

	423 fail_on_start = lambda exit_code, stdout: fail_without_command(remote,
	M-A Ruel 2016/10/21 20:15:19 wrap remote on next line so they are aligned. wrap remote on next line so they are aligned. aludwin 2016/10/21 20:31:59 Done. Show quoted text On 2016/10/21 20:15:19, M-A Ruel wrote: > wrap remote on next line so they are aligned. Done.
	424 bot_id, task_id, params, cost_usd_hour, task_start, exit_code, stdout)

	425

	426 # We write args to a file since there may be more of them than the OS

	427 # can handle.

	428 try:

	429 args_file = open(args_path, "w")

	430 json.dump(args, args_file)

	431 args_file.close()

	432 except (OSError, IOError) as e:

	433 return fail_on_start(-1,

	434 "Could not write args to %s: %s" % (args_path, e))
	M-A Ruel 2016/10/21 20:15:19 single quotes, align all args the same. single quotes, align all args the same. aludwin 2016/10/21 20:32:00 Done. Show quoted text On 2016/10/21 20:15:19, M-A Ruel wrote: > single quotes, align all args the same. Done.
	435

	436 # Start the command

402 try:	437 try:

403 assert cmd and all(isinstance(a, basestring) for a in cmd)	438 assert cmd and all(isinstance(a, basestring) for a in cmd)

404 proc = subprocess42.Popen(	439 proc = subprocess42.Popen(

405 cmd,	440 cmd,

406 env=env,	441 env=env,

407 cwd=work_dir,	442 cwd=work_dir,

408 detached=True,	443 detached=True,

409 stdout=subprocess42.PIPE,	444 stdout=subprocess42.PIPE,

410 stderr=subprocess42.STDOUT,	445 stderr=subprocess42.STDOUT,

411 stdin=subprocess42.PIPE)	446 stdin=subprocess42.PIPE)

412 except OSError as e:	447 except OSError as e:

413 stdout = 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e)	448 return fail_on_start(1,

414 now = monotonic_time()	449 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e))

415 params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.

416 params['duration'] = now - start

417 params['io_timeout'] = False

418 params['hard_timeout'] = False

419 # Ignore server reply to stop.

420 remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1)

421 return {

422 u'exit_code': 1,

423 u'hard_timeout': False,

424 u'io_timeout': False,

425 u'must_signal_internal_failure': None,

426 u'version': OUT_VERSION,

427 }

428	450

	451 # Monitor the task

429 output_chunk_start = 0	452 output_chunk_start = 0

430 stdout = ''	453 stdout = ''

431 exit_code = None	454 exit_code = None

432 had_io_timeout = False	455 had_io_timeout = False

433 must_signal_internal_failure = None	456 must_signal_internal_failure = None

434 kill_sent = False	457 kill_sent = False

435 timed_out = None	458 timed_out = None

436 try:	459 try:

437 calc = lambda: calc_yield_wait(	460 calc = lambda: calc_yield_wait(

438 task_details, start, last_io, timed_out, stdout)	461 task_details, start, last_io, timed_out, stdout)

(...skipping 186 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
625 options.start = now	648 options.start = now

626	649

627 try:	650 try:

628 load_and_run(	651 load_and_run(

629 options.in_file, options.swarming_server, options.cost_usd_hour,	652 options.in_file, options.swarming_server, options.cost_usd_hour,

630 options.start, options.out_file, options.min_free_space,	653 options.start, options.out_file, options.min_free_space,

631 options.bot_file, options.auth_params_file)	654 options.bot_file, options.auth_params_file)

632 return 0	655 return 0

633 finally:	656 finally:

634 logging.info('quitting')	657 logging.info('quitting')

OLD	NEW