Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1339)

Side by Side Diff: appengine/swarming/swarming_bot/bot_code/task_runner.py

Issue 2443663002: Pass args in file from task_runner to run_isolated (Closed)
Patch Set: Respond to code review comments other than changes to options processing Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2013 The LUCI Authors. All rights reserved. 1 # Copyright 2013 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """Runs a Swarming task. 5 """Runs a Swarming task.
6 6
7 Downloads all the necessary files to run the task, executes the command and 7 Downloads all the necessary files to run the task, executes the command and
8 streams results back to the Swarming server. 8 streams results back to the Swarming server.
9 9
10 The process exit code is 0 when the task was executed, even if the task itself 10 The process exit code is 0 when the task was executed, even if the task itself
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
76 76
77 77
78 def get_run_isolated(): 78 def get_run_isolated():
79 """Returns the path to itself to run run_isolated. 79 """Returns the path to itself to run run_isolated.
80 80
81 Mocked in test to point to the real run_isolated.py script. 81 Mocked in test to point to the real run_isolated.py script.
82 """ 82 """
83 return [sys.executable, THIS_FILE, 'run_isolated'] 83 return [sys.executable, THIS_FILE, 'run_isolated']
84 84
85 85
86 def get_isolated_cmd( 86 def get_isolated_args(
87 work_dir, task_details, isolated_result, bot_file, min_free_space): 87 work_dir, task_details, isolated_result, bot_file, min_free_space):
88 """Returns the command to call run_isolated. Mocked in tests.""" 88 """Returns the command to call run_isolated. Mocked in tests."""
89 assert (bool(task_details.command) != 89 assert (bool(task_details.command) !=
90 bool(task_details.isolated and task_details.isolated.get('input'))) 90 bool(task_details.isolated and task_details.isolated.get('input')))
91 bot_dir = os.path.dirname(work_dir) 91 bot_dir = os.path.dirname(work_dir)
92 if os.path.isfile(isolated_result): 92 if os.path.isfile(isolated_result):
93 os.remove(isolated_result) 93 os.remove(isolated_result)
94 cmd = get_run_isolated() 94 cmd = []
95 95
96 if task_details.isolated: 96 if task_details.isolated:
97 cmd.extend( 97 cmd.extend(
98 [ 98 [
99 '-I', task_details.isolated['server'].encode('utf-8'), 99 '-I', task_details.isolated['server'].encode('utf-8'),
100 '--namespace', task_details.isolated['namespace'].encode('utf-8'), 100 '--namespace', task_details.isolated['namespace'].encode('utf-8'),
101 ]) 101 ])
102 isolated_input = task_details.isolated.get('input') 102 isolated_input = task_details.isolated.get('input')
103 if isolated_input: 103 if isolated_input:
104 cmd.extend( 104 cmd.extend(
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after
336 try: 336 try:
337 proc.wait(grace_period) 337 proc.wait(grace_period)
338 except subprocess42.TimeoutError: 338 except subprocess42.TimeoutError:
339 logging.warning('SIGKILL finally due to %s', reason) 339 logging.warning('SIGKILL finally due to %s', reason)
340 proc.kill() 340 proc.kill()
341 exit_code = proc.wait() 341 exit_code = proc.wait()
342 logging.info('Waiting for process exit in finally - done') 342 logging.info('Waiting for process exit in finally - done')
343 return exit_code 343 return exit_code
344 344
345 345
346 def fail_without_command(remote, bot_id, task_id, params, cost_usd_hour,
347 task_start, exit_code, stdout):
348 now = monotonic_time()
349 params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.
350 params['duration'] = now - task_start
351 params['io_timeout'] = False
352 params['hard_timeout'] = False
353 # Ignore server reply to stop.
354 remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1)
355 return {
356 u'exit_code': exit_code,
357 u'hard_timeout': False,
358 u'io_timeout': False,
359 u'must_signal_internal_failure': None,
360 u'version': OUT_VERSION,
361 }
362
363
346 def run_command(remote, task_details, work_dir, cost_usd_hour, 364 def run_command(remote, task_details, work_dir, cost_usd_hour,
347 task_start, min_free_space, bot_file, extra_env): 365 task_start, min_free_space, bot_file, extra_env):
348 """Runs a command and sends packets to the server to stream results back. 366 """Runs a command and sends packets to the server to stream results back.
349 367
350 Implements both I/O and hard timeouts. Sends the packets numbered, so the 368 Implements both I/O and hard timeouts. Sends the packets numbered, so the
351 server can ensure they are processed in order. 369 server can ensure they are processed in order.
352 370
353 Returns: 371 Returns:
354 Metadata dict with the execution result. 372 Metadata dict with the execution result.
355 373
(...skipping 14 matching lines...) Expand all
370 # Don't even bother, the task was already canceled. 388 # Don't even bother, the task was already canceled.
371 return { 389 return {
372 u'exit_code': -1, 390 u'exit_code': -1,
373 u'hard_timeout': False, 391 u'hard_timeout': False,
374 u'io_timeout': False, 392 u'io_timeout': False,
375 u'must_signal_internal_failure': None, 393 u'must_signal_internal_failure': None,
376 u'version': OUT_VERSION, 394 u'version': OUT_VERSION,
377 } 395 }
378 396
379 isolated_result = os.path.join(work_dir, 'isolated_result.json') 397 isolated_result = os.path.join(work_dir, 'isolated_result.json')
380 cmd = get_isolated_cmd( 398 args_path = os.path.join(work_dir, 'run_isolated_args.json')
399 cmd = get_run_isolated()
400 cmd.extend(['-f', args_path])
401 args = get_isolated_args(
381 work_dir, task_details, isolated_result, bot_file, min_free_space) 402 work_dir, task_details, isolated_result, bot_file, min_free_space)
382 # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to 403 # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to
383 # give one 'grace_period' slot to the child process and one slot to upload 404 # give one 'grace_period' slot to the child process and one slot to upload
384 # the results back. 405 # the results back.
385 task_details.hard_timeout = 0 406 task_details.hard_timeout = 0
386 if task_details.grace_period: 407 if task_details.grace_period:
387 task_details.grace_period *= 2 408 task_details.grace_period *= 2
388 409
389 try: 410 try:
390 # TODO(maruel): Support both channels independently and display stderr in 411 # TODO(maruel): Support both channels independently and display stderr in
391 # red. 412 # red.
392 env = os.environ.copy() 413 env = os.environ.copy()
393 for env_to_add in (task_details.env, extra_env): 414 for env_to_add in (task_details.env, extra_env):
394 for key, value in (env_to_add or {}).iteritems(): 415 for key, value in (env_to_add or {}).iteritems():
395 if not value: 416 if not value:
396 env.pop(key, None) 417 env.pop(key, None)
397 else: 418 else:
398 env[key] = value 419 env[key] = value
399 logging.info('cmd=%s', cmd) 420 logging.info('cmd=%s', cmd)
400 logging.info('cwd=%s', work_dir) 421 logging.info('cwd=%s', work_dir)
401 logging.info('env=%s', env) 422 logging.info('env=%s', env)
423 fail_on_start = lambda exit_code, stdout: fail_without_command(
424 remote, bot_id, task_id, params, cost_usd_hour, task_start,
425 exit_code, stdout)
426
427 # We write args to a file since there may be more of them than the OS
428 # can handle.
429 try:
430 args_file = open(args_path, "w")
M-A Ruel 2016/10/24 16:26:52 with open(...
aludwin 2016/10/24 17:05:48 Do I put that inside a try-catch?
M-A Ruel 2016/10/24 17:11:53 yes, so it looks like: try: with open(args_path
431 json.dump(args, args_file)
432 args_file.close()
433 except (OSError, IOError) as e:
434 return fail_on_start(
435 -1,
436 'Could not write args to %s: %s' % (args_path, e))
437
438 # Start the command
402 try: 439 try:
403 assert cmd and all(isinstance(a, basestring) for a in cmd) 440 assert cmd and all(isinstance(a, basestring) for a in cmd)
404 proc = subprocess42.Popen( 441 proc = subprocess42.Popen(
405 cmd, 442 cmd,
406 env=env, 443 env=env,
407 cwd=work_dir, 444 cwd=work_dir,
408 detached=True, 445 detached=True,
409 stdout=subprocess42.PIPE, 446 stdout=subprocess42.PIPE,
410 stderr=subprocess42.STDOUT, 447 stderr=subprocess42.STDOUT,
411 stdin=subprocess42.PIPE) 448 stdin=subprocess42.PIPE)
412 except OSError as e: 449 except OSError as e:
413 stdout = 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e) 450 return fail_on_start(
414 now = monotonic_time() 451 1,
415 params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60. 452 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e))
416 params['duration'] = now - start
417 params['io_timeout'] = False
418 params['hard_timeout'] = False
419 # Ignore server reply to stop.
420 remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1)
421 return {
422 u'exit_code': 1,
423 u'hard_timeout': False,
424 u'io_timeout': False,
425 u'must_signal_internal_failure': None,
426 u'version': OUT_VERSION,
427 }
428 453
454 # Monitor the task
429 output_chunk_start = 0 455 output_chunk_start = 0
430 stdout = '' 456 stdout = ''
431 exit_code = None 457 exit_code = None
432 had_io_timeout = False 458 had_io_timeout = False
433 must_signal_internal_failure = None 459 must_signal_internal_failure = None
434 kill_sent = False 460 kill_sent = False
435 timed_out = None 461 timed_out = None
436 try: 462 try:
437 calc = lambda: calc_yield_wait( 463 calc = lambda: calc_yield_wait(
438 task_details, start, last_io, timed_out, stdout) 464 task_details, start, last_io, timed_out, stdout)
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after
625 options.start = now 651 options.start = now
626 652
627 try: 653 try:
628 load_and_run( 654 load_and_run(
629 options.in_file, options.swarming_server, options.cost_usd_hour, 655 options.in_file, options.swarming_server, options.cost_usd_hour,
630 options.start, options.out_file, options.min_free_space, 656 options.start, options.out_file, options.min_free_space,
631 options.bot_file, options.auth_params_file) 657 options.bot_file, options.auth_params_file)
632 return 0 658 return 0
633 finally: 659 finally:
634 logging.info('quitting') 660 logging.info('quitting')
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698