| OLD | NEW |
| 1 # Copyright 2013 The Swarming Authors. All rights reserved. | 1 # Copyright 2013 The Swarming Authors. All rights reserved. |
| 2 # Use of this source code is governed by the Apache v2.0 license that can be | 2 # Use of this source code is governed by the Apache v2.0 license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Runs a Swarming task. | 5 """Runs a Swarming task. |
| 6 | 6 |
| 7 Downloads all the necessary files to run the task, executes the command and | 7 Downloads all the necessary files to run the task, executes the command and |
| 8 streams results back to the Swarming server. | 8 streams results back to the Swarming server. |
| 9 | 9 |
| 10 The process exit code is 0 when the task was executed, even if the task itself | 10 The process exit code is 0 when the task was executed, even if the task itself |
| (...skipping 327 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 338 u'must_signal_internal_failure': None, | 338 u'must_signal_internal_failure': None, |
| 339 u'version': OUT_VERSION, | 339 u'version': OUT_VERSION, |
| 340 } | 340 } |
| 341 | 341 |
| 342 output_chunk_start = 0 | 342 output_chunk_start = 0 |
| 343 stdout = '' | 343 stdout = '' |
| 344 exit_code = None | 344 exit_code = None |
| 345 had_hard_timeout = False | 345 had_hard_timeout = False |
| 346 had_io_timeout = False | 346 had_io_timeout = False |
| 347 must_signal_internal_failure = None | 347 must_signal_internal_failure = None |
| 348 kill_sent = False |
| 348 timed_out = None | 349 timed_out = None |
| 349 try: | 350 try: |
| 350 calc = lambda: calc_yield_wait( | 351 calc = lambda: calc_yield_wait( |
| 351 task_details, start, last_io, timed_out, stdout) | 352 task_details, start, last_io, timed_out, stdout) |
| 352 maxsize = lambda: MAX_CHUNK_SIZE - len(stdout) | 353 maxsize = lambda: MAX_CHUNK_SIZE - len(stdout) |
| 353 last_io = monotonic_time() | 354 last_io = monotonic_time() |
| 354 for _, new_data in proc.yield_any(maxsize=maxsize, soft_timeout=calc): | 355 for _, new_data in proc.yield_any(maxsize=maxsize, soft_timeout=calc): |
| 355 now = monotonic_time() | 356 now = monotonic_time() |
| 356 if new_data: | 357 if new_data: |
| 357 stdout += new_data | 358 stdout += new_data |
| 358 last_io = now | 359 last_io = now |
| 359 | 360 |
| 360 # Post update if necessary. | 361 # Post update if necessary. |
| 361 if should_post_update(stdout, now, last_packet): | 362 if should_post_update(stdout, now, last_packet): |
| 362 last_packet = monotonic_time() | 363 last_packet = monotonic_time() |
| 363 params['cost_usd'] = ( | 364 params['cost_usd'] = ( |
| 364 cost_usd_hour * (last_packet - task_start) / 60. / 60.) | 365 cost_usd_hour * (last_packet - task_start) / 60. / 60.) |
| 365 post_update(swarming_server, params, None, stdout, output_chunk_start) | 366 post_update(swarming_server, params, None, stdout, output_chunk_start) |
| 366 output_chunk_start += len(stdout) | 367 output_chunk_start += len(stdout) |
| 367 stdout = '' | 368 stdout = '' |
| 368 | 369 |
| 369 # Send signal on timeout if necessary. Both are failures, not | 370 # Send signal on timeout if necessary. Both are failures, not |
| 370 # internal_failures. | 371 # internal_failures. |
| 371 # Eventually kill but return 0 so bot_main.py doesn't cancel the task. | 372 # Eventually kill but return 0 so bot_main.py doesn't cancel the task. |
| 372 if not timed_out: | 373 if not timed_out: |
| 373 if now - last_io > task_details.io_timeout: | 374 if now - last_io > task_details.io_timeout: |
| 374 had_io_timeout = True | 375 had_io_timeout = True |
| 375 logging.warning('I/O timeout') | 376 logging.warning('I/O timeout; sending SIGTERM') |
| 376 try: | 377 proc.terminate() |
| 377 proc.terminate() | |
| 378 except OSError: | |
| 379 pass | |
| 380 timed_out = monotonic_time() | 378 timed_out = monotonic_time() |
| 381 elif now - start > task_details.hard_timeout: | 379 elif now - start > task_details.hard_timeout: |
| 382 had_hard_timeout = True | 380 had_hard_timeout = True |
| 383 logging.warning('Hard timeout') | 381 logging.warning('Hard timeout; sending SIGTERM') |
| 384 try: | 382 proc.terminate() |
| 385 proc.terminate() | |
| 386 except OSError: | |
| 387 pass | |
| 388 timed_out = monotonic_time() | 383 timed_out = monotonic_time() |
| 389 else: | 384 else: |
| 390 # During grace period. | 385 # During grace period. |
| 391 if now >= timed_out + task_details.grace_period: | 386 if not kill_sent and now >= timed_out + task_details.grace_period: |
| 392 # Now kill for real. The user can distinguish between the following | 387 # Now kill for real. The user can distinguish between the following |
| 393 # states: | 388 # states: |
| 394 # - signal but process exited within grace period, | 389 # - signal but process exited within grace period, |
| 395 # (hard_|io_)_timed_out will be set but the process exit code will | 390 # (hard_|io_)_timed_out will be set but the process exit code will |
| 396 # be script provided. | 391 # be script provided. |
| 397 # - processed exited late, exit code will be -9 on posix. | 392 # - processed exited late, exit code will be -9 on posix. |
| 398 try: | 393 logging.warning('Grace exhausted; sending SIGKILL') |
| 399 logging.warning('proc.kill() after grace') | 394 proc.kill() |
| 400 proc.kill() | 395 kill_sent = True |
| 401 except OSError: | |
| 402 pass | |
| 403 logging.info('Waiting for proces exit') | 396 logging.info('Waiting for proces exit') |
| 404 exit_code = proc.wait() | 397 exit_code = proc.wait() |
| 405 except MustExit as e: | 398 except MustExit as e: |
| 406 # TODO(maruel): Do the send SIGTERM to child process and give it | 399 # TODO(maruel): Do the send SIGTERM to child process and give it |
| 407 # task_details.grace_period to terminate. | 400 # task_details.grace_period to terminate. |
| 408 must_signal_internal_failure = ( | 401 must_signal_internal_failure = ( |
| 409 u'task_runner received signal %s' % e.signal) | 402 u'task_runner received signal %s' % e.signal) |
| 410 exit_code = kill_and_wait(proc, 'signal %d' % e.signal) | 403 exit_code = kill_and_wait(proc, 'signal %d' % e.signal) |
| 411 except (IOError, OSError): | 404 except (IOError, OSError): |
| 412 # Something wrong happened, try to kill the child process. | 405 # Something wrong happened, try to kill the child process. |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 484 if options.start > now: | 477 if options.start > now: |
| 485 options.start = now | 478 options.start = now |
| 486 | 479 |
| 487 try: | 480 try: |
| 488 load_and_run( | 481 load_and_run( |
| 489 options.in_file, remote, options.cost_usd_hour, options.start, | 482 options.in_file, remote, options.cost_usd_hour, options.start, |
| 490 options.out_file) | 483 options.out_file) |
| 491 return 0 | 484 return 0 |
| 492 finally: | 485 finally: |
| 493 logging.info('quitting') | 486 logging.info('quitting') |
| OLD | NEW |