Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(454)

Side by Side Diff: appengine/swarming/swarming_bot/bot_code/task_runner.py

Issue 2443663002: Pass args in file from task_runner to run_isolated (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2013 The LUCI Authors. All rights reserved. 1 # Copyright 2013 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """Runs a Swarming task. 5 """Runs a Swarming task.
6 6
7 Downloads all the necessary files to run the task, executes the command and 7 Downloads all the necessary files to run the task, executes the command and
8 streams results back to the Swarming server. 8 streams results back to the Swarming server.
9 9
10 The process exit code is 0 when the task was executed, even if the task itself 10 The process exit code is 0 when the task was executed, even if the task itself
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
76 76
77 77
78 def get_run_isolated(): 78 def get_run_isolated():
79 """Returns the path to itself to run run_isolated. 79 """Returns the path to itself to run run_isolated.
80 80
81 Mocked in test to point to the real run_isolated.py script. 81 Mocked in test to point to the real run_isolated.py script.
82 """ 82 """
83 return [sys.executable, THIS_FILE, 'run_isolated'] 83 return [sys.executable, THIS_FILE, 'run_isolated']
84 84
85 85
86 def get_isolated_cmd( 86 def get_isolated_args(
87 work_dir, task_details, isolated_result, bot_file, min_free_space): 87 work_dir, task_details, isolated_result, bot_file, min_free_space):
88 """Returns the command to call run_isolated. Mocked in tests.""" 88 """Returns the command to call run_isolated. Mocked in tests."""
89 assert (bool(task_details.command) != 89 assert (bool(task_details.command) !=
90 bool(task_details.isolated and task_details.isolated.get('input'))) 90 bool(task_details.isolated and task_details.isolated.get('input')))
91 bot_dir = os.path.dirname(work_dir) 91 bot_dir = os.path.dirname(work_dir)
92 if os.path.isfile(isolated_result): 92 if os.path.isfile(isolated_result):
93 os.remove(isolated_result) 93 os.remove(isolated_result)
94 cmd = get_run_isolated() 94 cmd = []
95 95
96 if task_details.isolated: 96 if task_details.isolated:
97 cmd.extend( 97 cmd.extend(
98 [ 98 [
99 '-I', task_details.isolated['server'].encode('utf-8'), 99 '-I', task_details.isolated['server'].encode('utf-8'),
100 '--namespace', task_details.isolated['namespace'].encode('utf-8'), 100 '--namespace', task_details.isolated['namespace'].encode('utf-8'),
101 ]) 101 ])
102 isolated_input = task_details.isolated.get('input') 102 isolated_input = task_details.isolated.get('input')
103 if isolated_input: 103 if isolated_input:
104 cmd.extend( 104 cmd.extend(
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after
336 try: 336 try:
337 proc.wait(grace_period) 337 proc.wait(grace_period)
338 except subprocess42.TimeoutError: 338 except subprocess42.TimeoutError:
339 logging.warning('SIGKILL finally due to %s', reason) 339 logging.warning('SIGKILL finally due to %s', reason)
340 proc.kill() 340 proc.kill()
341 exit_code = proc.wait() 341 exit_code = proc.wait()
342 logging.info('Waiting for process exit in finally - done') 342 logging.info('Waiting for process exit in finally - done')
343 return exit_code 343 return exit_code
344 344
345 345
346 def fail_without_command(remote, bot_id, task_id, params, cost_usd_hour,
347 task_start, exit_code, stdout):
348 now = monotonic_time()
349 params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.
350 params['duration'] = now - task_start
351 params['io_timeout'] = False
352 params['hard_timeout'] = False
353 # Ignore server reply to stop.
354 remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1)
355 return {
356 u'exit_code': exit_code,
357 u'hard_timeout': False,
358 u'io_timeout': False,
359 u'must_signal_internal_failure': None,
360 u'version': OUT_VERSION,
361 }
362
363
346 def run_command(remote, task_details, work_dir, cost_usd_hour, 364 def run_command(remote, task_details, work_dir, cost_usd_hour,
347 task_start, min_free_space, bot_file, extra_env): 365 task_start, min_free_space, bot_file, extra_env):
348 """Runs a command and sends packets to the server to stream results back. 366 """Runs a command and sends packets to the server to stream results back.
349 367
350 Implements both I/O and hard timeouts. Sends the packets numbered, so the 368 Implements both I/O and hard timeouts. Sends the packets numbered, so the
351 server can ensure they are processed in order. 369 server can ensure they are processed in order.
352 370
353 Returns: 371 Returns:
354 Metadata dict with the execution result. 372 Metadata dict with the execution result.
355 373
(...skipping 14 matching lines...) Expand all
370 # Don't even bother, the task was already canceled. 388 # Don't even bother, the task was already canceled.
371 return { 389 return {
372 u'exit_code': -1, 390 u'exit_code': -1,
373 u'hard_timeout': False, 391 u'hard_timeout': False,
374 u'io_timeout': False, 392 u'io_timeout': False,
375 u'must_signal_internal_failure': None, 393 u'must_signal_internal_failure': None,
376 u'version': OUT_VERSION, 394 u'version': OUT_VERSION,
377 } 395 }
378 396
379 isolated_result = os.path.join(work_dir, 'isolated_result.json') 397 isolated_result = os.path.join(work_dir, 'isolated_result.json')
380 cmd = get_isolated_cmd( 398 args_path = os.path.join(work_dir, 'run_isolated_args.json')
399 cmd = get_run_isolated()
400 cmd.extend(['-f', args_path])
401 args = get_isolated_args(
381 work_dir, task_details, isolated_result, bot_file, min_free_space) 402 work_dir, task_details, isolated_result, bot_file, min_free_space)
382 # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to 403 # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to
383 # give one 'grace_period' slot to the child process and one slot to upload 404 # give one 'grace_period' slot to the child process and one slot to upload
384 # the results back. 405 # the results back.
385 task_details.hard_timeout = 0 406 task_details.hard_timeout = 0
386 if task_details.grace_period: 407 if task_details.grace_period:
387 task_details.grace_period *= 2 408 task_details.grace_period *= 2
388 409
389 try: 410 try:
390 # TODO(maruel): Support both channels independently and display stderr in 411 # TODO(maruel): Support both channels independently and display stderr in
391 # red. 412 # red.
392 env = os.environ.copy() 413 env = os.environ.copy()
393 for env_to_add in (task_details.env, extra_env): 414 for env_to_add in (task_details.env, extra_env):
394 for key, value in (env_to_add or {}).iteritems(): 415 for key, value in (env_to_add or {}).iteritems():
395 if not value: 416 if not value:
396 env.pop(key, None) 417 env.pop(key, None)
397 else: 418 else:
398 env[key] = value 419 env[key] = value
399 logging.info('cmd=%s', cmd) 420 logging.info('cmd=%s', cmd)
400 logging.info('cwd=%s', work_dir) 421 logging.info('cwd=%s', work_dir)
401 logging.info('env=%s', env) 422 logging.info('env=%s', env)
423 fail_on_start = lambda exit_code, stdout: fail_without_command(remote,
M-A Ruel 2016/10/21 20:15:19 wrap remote on next line so they are aligned.
aludwin 2016/10/21 20:31:59 Done.
424 bot_id, task_id, params, cost_usd_hour, task_start, exit_code, stdout)
425
426 # We write args to a file since there may be more of them than the OS
427 # can handle.
428 try:
429 args_file = open(args_path, "w")
430 json.dump(args, args_file)
431 args_file.close()
432 except (OSError, IOError) as e:
433 return fail_on_start(-1,
434 "Could not write args to %s: %s" % (args_path, e))
M-A Ruel 2016/10/21 20:15:19 single quotes, align all args the same.
aludwin 2016/10/21 20:32:00 Done.
435
436 # Start the command
402 try: 437 try:
403 assert cmd and all(isinstance(a, basestring) for a in cmd) 438 assert cmd and all(isinstance(a, basestring) for a in cmd)
404 proc = subprocess42.Popen( 439 proc = subprocess42.Popen(
405 cmd, 440 cmd,
406 env=env, 441 env=env,
407 cwd=work_dir, 442 cwd=work_dir,
408 detached=True, 443 detached=True,
409 stdout=subprocess42.PIPE, 444 stdout=subprocess42.PIPE,
410 stderr=subprocess42.STDOUT, 445 stderr=subprocess42.STDOUT,
411 stdin=subprocess42.PIPE) 446 stdin=subprocess42.PIPE)
412 except OSError as e: 447 except OSError as e:
413 stdout = 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e) 448 return fail_on_start(1,
414 now = monotonic_time() 449 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e))
415 params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.
416 params['duration'] = now - start
417 params['io_timeout'] = False
418 params['hard_timeout'] = False
419 # Ignore server reply to stop.
420 remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1)
421 return {
422 u'exit_code': 1,
423 u'hard_timeout': False,
424 u'io_timeout': False,
425 u'must_signal_internal_failure': None,
426 u'version': OUT_VERSION,
427 }
428 450
451 # Monitor the task
429 output_chunk_start = 0 452 output_chunk_start = 0
430 stdout = '' 453 stdout = ''
431 exit_code = None 454 exit_code = None
432 had_io_timeout = False 455 had_io_timeout = False
433 must_signal_internal_failure = None 456 must_signal_internal_failure = None
434 kill_sent = False 457 kill_sent = False
435 timed_out = None 458 timed_out = None
436 try: 459 try:
437 calc = lambda: calc_yield_wait( 460 calc = lambda: calc_yield_wait(
438 task_details, start, last_io, timed_out, stdout) 461 task_details, start, last_io, timed_out, stdout)
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after
625 options.start = now 648 options.start = now
626 649
627 try: 650 try:
628 load_and_run( 651 load_and_run(
629 options.in_file, options.swarming_server, options.cost_usd_hour, 652 options.in_file, options.swarming_server, options.cost_usd_hour,
630 options.start, options.out_file, options.min_free_space, 653 options.start, options.out_file, options.min_free_space,
631 options.bot_file, options.auth_params_file) 654 options.bot_file, options.auth_params_file)
632 return 0 655 return 0
633 finally: 656 finally:
634 logging.info('quitting') 657 logging.info('quitting')
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698