Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2013 The LUCI Authors. All rights reserved. | 1 # Copyright 2013 The LUCI Authors. All rights reserved. |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 | 2 # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 # that can be found in the LICENSE file. | 3 # that can be found in the LICENSE file. |
| 4 | 4 |
| 5 """Runs a Swarming task. | 5 """Runs a Swarming task. |
| 6 | 6 |
| 7 Downloads all the necessary files to run the task, executes the command and | 7 Downloads all the necessary files to run the task, executes the command and |
| 8 streams results back to the Swarming server. | 8 streams results back to the Swarming server. |
| 9 | 9 |
| 10 The process exit code is 0 when the task was executed, even if the task itself | 10 The process exit code is 0 when the task was executed, even if the task itself |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 76 | 76 |
| 77 | 77 |
| 78 def get_run_isolated(): | 78 def get_run_isolated(): |
| 79 """Returns the path to itself to run run_isolated. | 79 """Returns the path to itself to run run_isolated. |
| 80 | 80 |
| 81 Mocked in test to point to the real run_isolated.py script. | 81 Mocked in test to point to the real run_isolated.py script. |
| 82 """ | 82 """ |
| 83 return [sys.executable, THIS_FILE, 'run_isolated'] | 83 return [sys.executable, THIS_FILE, 'run_isolated'] |
| 84 | 84 |
| 85 | 85 |
| 86 def get_isolated_cmd( | 86 def get_isolated_args( |
| 87 work_dir, task_details, isolated_result, bot_file, min_free_space): | 87 work_dir, task_details, isolated_result, bot_file, min_free_space): |
| 88 """Returns the command to call run_isolated. Mocked in tests.""" | 88 """Returns the command to call run_isolated. Mocked in tests.""" |
| 89 assert (bool(task_details.command) != | 89 assert (bool(task_details.command) != |
| 90 bool(task_details.isolated and task_details.isolated.get('input'))) | 90 bool(task_details.isolated and task_details.isolated.get('input'))) |
| 91 bot_dir = os.path.dirname(work_dir) | 91 bot_dir = os.path.dirname(work_dir) |
| 92 if os.path.isfile(isolated_result): | 92 if os.path.isfile(isolated_result): |
| 93 os.remove(isolated_result) | 93 os.remove(isolated_result) |
| 94 cmd = get_run_isolated() | 94 cmd = [] |
| 95 | 95 |
| 96 if task_details.isolated: | 96 if task_details.isolated: |
| 97 cmd.extend( | 97 cmd.extend( |
| 98 [ | 98 [ |
| 99 '-I', task_details.isolated['server'].encode('utf-8'), | 99 '-I', task_details.isolated['server'].encode('utf-8'), |
| 100 '--namespace', task_details.isolated['namespace'].encode('utf-8'), | 100 '--namespace', task_details.isolated['namespace'].encode('utf-8'), |
| 101 ]) | 101 ]) |
| 102 isolated_input = task_details.isolated.get('input') | 102 isolated_input = task_details.isolated.get('input') |
| 103 if isolated_input: | 103 if isolated_input: |
| 104 cmd.extend( | 104 cmd.extend( |
| (...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 336 try: | 336 try: |
| 337 proc.wait(grace_period) | 337 proc.wait(grace_period) |
| 338 except subprocess42.TimeoutError: | 338 except subprocess42.TimeoutError: |
| 339 logging.warning('SIGKILL finally due to %s', reason) | 339 logging.warning('SIGKILL finally due to %s', reason) |
| 340 proc.kill() | 340 proc.kill() |
| 341 exit_code = proc.wait() | 341 exit_code = proc.wait() |
| 342 logging.info('Waiting for process exit in finally - done') | 342 logging.info('Waiting for process exit in finally - done') |
| 343 return exit_code | 343 return exit_code |
| 344 | 344 |
| 345 | 345 |
| 346 def fail_without_command(remote, bot_id, task_id, params, cost_usd_hour, | |
| 347 task_start, exit_code, stdout): | |
| 348 now = monotonic_time() | |
| 349 params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60. | |
| 350 params['duration'] = now - task_start | |
| 351 params['io_timeout'] = False | |
| 352 params['hard_timeout'] = False | |
| 353 # Ignore server reply to stop. | |
| 354 remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1) | |
| 355 return { | |
| 356 u'exit_code': exit_code, | |
| 357 u'hard_timeout': False, | |
| 358 u'io_timeout': False, | |
| 359 u'must_signal_internal_failure': None, | |
| 360 u'version': OUT_VERSION, | |
| 361 } | |
| 362 | |
| 363 | |
| 346 def run_command(remote, task_details, work_dir, cost_usd_hour, | 364 def run_command(remote, task_details, work_dir, cost_usd_hour, |
| 347 task_start, min_free_space, bot_file, extra_env): | 365 task_start, min_free_space, bot_file, extra_env): |
| 348 """Runs a command and sends packets to the server to stream results back. | 366 """Runs a command and sends packets to the server to stream results back. |
| 349 | 367 |
| 350 Implements both I/O and hard timeouts. Sends the packets numbered, so the | 368 Implements both I/O and hard timeouts. Sends the packets numbered, so the |
| 351 server can ensure they are processed in order. | 369 server can ensure they are processed in order. |
| 352 | 370 |
| 353 Returns: | 371 Returns: |
| 354 Metadata dict with the execution result. | 372 Metadata dict with the execution result. |
| 355 | 373 |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 370 # Don't even bother, the task was already canceled. | 388 # Don't even bother, the task was already canceled. |
| 371 return { | 389 return { |
| 372 u'exit_code': -1, | 390 u'exit_code': -1, |
| 373 u'hard_timeout': False, | 391 u'hard_timeout': False, |
| 374 u'io_timeout': False, | 392 u'io_timeout': False, |
| 375 u'must_signal_internal_failure': None, | 393 u'must_signal_internal_failure': None, |
| 376 u'version': OUT_VERSION, | 394 u'version': OUT_VERSION, |
| 377 } | 395 } |
| 378 | 396 |
| 379 isolated_result = os.path.join(work_dir, 'isolated_result.json') | 397 isolated_result = os.path.join(work_dir, 'isolated_result.json') |
| 380 cmd = get_isolated_cmd( | 398 args_path = os.path.join(work_dir, 'run_isolated_args.json') |
| 399 cmd = get_run_isolated() | |
| 400 cmd.extend(['-f', args_path]) | |
| 401 args = get_isolated_args( | |
| 381 work_dir, task_details, isolated_result, bot_file, min_free_space) | 402 work_dir, task_details, isolated_result, bot_file, min_free_space) |
| 382 # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to | 403 # Hard timeout enforcement is deferred to run_isolated. Grace is doubled to |
| 383 # give one 'grace_period' slot to the child process and one slot to upload | 404 # give one 'grace_period' slot to the child process and one slot to upload |
| 384 # the results back. | 405 # the results back. |
| 385 task_details.hard_timeout = 0 | 406 task_details.hard_timeout = 0 |
| 386 if task_details.grace_period: | 407 if task_details.grace_period: |
| 387 task_details.grace_period *= 2 | 408 task_details.grace_period *= 2 |
| 388 | 409 |
| 389 try: | 410 try: |
| 390 # TODO(maruel): Support both channels independently and display stderr in | 411 # TODO(maruel): Support both channels independently and display stderr in |
| 391 # red. | 412 # red. |
| 392 env = os.environ.copy() | 413 env = os.environ.copy() |
| 393 for env_to_add in (task_details.env, extra_env): | 414 for env_to_add in (task_details.env, extra_env): |
| 394 for key, value in (env_to_add or {}).iteritems(): | 415 for key, value in (env_to_add or {}).iteritems(): |
| 395 if not value: | 416 if not value: |
| 396 env.pop(key, None) | 417 env.pop(key, None) |
| 397 else: | 418 else: |
| 398 env[key] = value | 419 env[key] = value |
| 399 logging.info('cmd=%s', cmd) | 420 logging.info('cmd=%s', cmd) |
| 400 logging.info('cwd=%s', work_dir) | 421 logging.info('cwd=%s', work_dir) |
| 401 logging.info('env=%s', env) | 422 logging.info('env=%s', env) |
| 423 fail_on_start = lambda exit_code, stdout: fail_without_command( | |
| 424 remote, bot_id, task_id, params, cost_usd_hour, task_start, | |
| 425 exit_code, stdout) | |
| 426 | |
| 427 # We write args to a file since there may be more of them than the OS | |
| 428 # can handle. | |
| 429 try: | |
| 430 args_file = open(args_path, "w") | |
|
M-A Ruel
2016/10/24 16:26:52
with open(...
aludwin
2016/10/24 17:05:48
Do I put that inside a try-catch?
M-A Ruel
2016/10/24 17:11:53
yes, so it looks like:
try:
with open(args_path
| |
| 431 json.dump(args, args_file) | |
| 432 args_file.close() | |
| 433 except (OSError, IOError) as e: | |
| 434 return fail_on_start( | |
| 435 -1, | |
| 436 'Could not write args to %s: %s' % (args_path, e)) | |
| 437 | |
| 438 # Start the command | |
| 402 try: | 439 try: |
| 403 assert cmd and all(isinstance(a, basestring) for a in cmd) | 440 assert cmd and all(isinstance(a, basestring) for a in cmd) |
| 404 proc = subprocess42.Popen( | 441 proc = subprocess42.Popen( |
| 405 cmd, | 442 cmd, |
| 406 env=env, | 443 env=env, |
| 407 cwd=work_dir, | 444 cwd=work_dir, |
| 408 detached=True, | 445 detached=True, |
| 409 stdout=subprocess42.PIPE, | 446 stdout=subprocess42.PIPE, |
| 410 stderr=subprocess42.STDOUT, | 447 stderr=subprocess42.STDOUT, |
| 411 stdin=subprocess42.PIPE) | 448 stdin=subprocess42.PIPE) |
| 412 except OSError as e: | 449 except OSError as e: |
| 413 stdout = 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e) | 450 return fail_on_start( |
| 414 now = monotonic_time() | 451 1, |
| 415 params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60. | 452 'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e)) |
| 416 params['duration'] = now - start | |
| 417 params['io_timeout'] = False | |
| 418 params['hard_timeout'] = False | |
| 419 # Ignore server reply to stop. | |
| 420 remote.post_task_update(task_id, bot_id, params, (stdout, 0), 1) | |
| 421 return { | |
| 422 u'exit_code': 1, | |
| 423 u'hard_timeout': False, | |
| 424 u'io_timeout': False, | |
| 425 u'must_signal_internal_failure': None, | |
| 426 u'version': OUT_VERSION, | |
| 427 } | |
| 428 | 453 |
| 454 # Monitor the task | |
| 429 output_chunk_start = 0 | 455 output_chunk_start = 0 |
| 430 stdout = '' | 456 stdout = '' |
| 431 exit_code = None | 457 exit_code = None |
| 432 had_io_timeout = False | 458 had_io_timeout = False |
| 433 must_signal_internal_failure = None | 459 must_signal_internal_failure = None |
| 434 kill_sent = False | 460 kill_sent = False |
| 435 timed_out = None | 461 timed_out = None |
| 436 try: | 462 try: |
| 437 calc = lambda: calc_yield_wait( | 463 calc = lambda: calc_yield_wait( |
| 438 task_details, start, last_io, timed_out, stdout) | 464 task_details, start, last_io, timed_out, stdout) |
| (...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 625 options.start = now | 651 options.start = now |
| 626 | 652 |
| 627 try: | 653 try: |
| 628 load_and_run( | 654 load_and_run( |
| 629 options.in_file, options.swarming_server, options.cost_usd_hour, | 655 options.in_file, options.swarming_server, options.cost_usd_hour, |
| 630 options.start, options.out_file, options.min_free_space, | 656 options.start, options.out_file, options.min_free_space, |
| 631 options.bot_file, options.auth_params_file) | 657 options.bot_file, options.auth_params_file) |
| 632 return 0 | 658 return 0 |
| 633 finally: | 659 finally: |
| 634 logging.info('quitting') | 660 logging.info('quitting') |
| OLD | NEW |