OLD | NEW |
1 # Copyright 2013 The Swarming Authors. All rights reserved. | 1 # Copyright 2013 The Swarming Authors. All rights reserved. |
2 # Use of this source code is governed by the Apache v2.0 license that can be | 2 # Use of this source code is governed by the Apache v2.0 license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """Runs a Swarming task. | 5 """Runs a Swarming task. |
6 | 6 |
7 Downloads all the necessary files to run the task, executes the command and | 7 Downloads all the necessary files to run the task, executes the command and |
8 streams results back to the Swarming server. | 8 streams results back to the Swarming server. |
9 | 9 |
10 The process exit code is 0 when the task was executed, even if the task itself | 10 The process exit code is 0 when the task was executed, even if the task itself |
(...skipping 327 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
338 u'must_signal_internal_failure': None, | 338 u'must_signal_internal_failure': None, |
339 u'version': OUT_VERSION, | 339 u'version': OUT_VERSION, |
340 } | 340 } |
341 | 341 |
342 output_chunk_start = 0 | 342 output_chunk_start = 0 |
343 stdout = '' | 343 stdout = '' |
344 exit_code = None | 344 exit_code = None |
345 had_hard_timeout = False | 345 had_hard_timeout = False |
346 had_io_timeout = False | 346 had_io_timeout = False |
347 must_signal_internal_failure = None | 347 must_signal_internal_failure = None |
| 348 kill_sent = False |
348 timed_out = None | 349 timed_out = None |
349 try: | 350 try: |
350 calc = lambda: calc_yield_wait( | 351 calc = lambda: calc_yield_wait( |
351 task_details, start, last_io, timed_out, stdout) | 352 task_details, start, last_io, timed_out, stdout) |
352 maxsize = lambda: MAX_CHUNK_SIZE - len(stdout) | 353 maxsize = lambda: MAX_CHUNK_SIZE - len(stdout) |
353 last_io = monotonic_time() | 354 last_io = monotonic_time() |
354 for _, new_data in proc.yield_any(maxsize=maxsize, soft_timeout=calc): | 355 for _, new_data in proc.yield_any(maxsize=maxsize, soft_timeout=calc): |
355 now = monotonic_time() | 356 now = monotonic_time() |
356 if new_data: | 357 if new_data: |
357 stdout += new_data | 358 stdout += new_data |
358 last_io = now | 359 last_io = now |
359 | 360 |
360 # Post update if necessary. | 361 # Post update if necessary. |
361 if should_post_update(stdout, now, last_packet): | 362 if should_post_update(stdout, now, last_packet): |
362 last_packet = monotonic_time() | 363 last_packet = monotonic_time() |
363 params['cost_usd'] = ( | 364 params['cost_usd'] = ( |
364 cost_usd_hour * (last_packet - task_start) / 60. / 60.) | 365 cost_usd_hour * (last_packet - task_start) / 60. / 60.) |
365 post_update(swarming_server, params, None, stdout, output_chunk_start) | 366 post_update(swarming_server, params, None, stdout, output_chunk_start) |
366 output_chunk_start += len(stdout) | 367 output_chunk_start += len(stdout) |
367 stdout = '' | 368 stdout = '' |
368 | 369 |
369 # Send signal on timeout if necessary. Both are failures, not | 370 # Send signal on timeout if necessary. Both are failures, not |
370 # internal_failures. | 371 # internal_failures. |
371 # Eventually kill but return 0 so bot_main.py doesn't cancel the task. | 372 # Eventually kill but return 0 so bot_main.py doesn't cancel the task. |
372 if not timed_out: | 373 if not timed_out: |
373 if now - last_io > task_details.io_timeout: | 374 if now - last_io > task_details.io_timeout: |
374 had_io_timeout = True | 375 had_io_timeout = True |
375 logging.warning('I/O timeout') | 376 logging.warning('I/O timeout; sending SIGTERM') |
376 try: | 377 proc.terminate() |
377 proc.terminate() | |
378 except OSError: | |
379 pass | |
380 timed_out = monotonic_time() | 378 timed_out = monotonic_time() |
381 elif now - start > task_details.hard_timeout: | 379 elif now - start > task_details.hard_timeout: |
382 had_hard_timeout = True | 380 had_hard_timeout = True |
383 logging.warning('Hard timeout') | 381 logging.warning('Hard timeout; sending SIGTERM') |
384 try: | 382 proc.terminate() |
385 proc.terminate() | |
386 except OSError: | |
387 pass | |
388 timed_out = monotonic_time() | 383 timed_out = monotonic_time() |
389 else: | 384 else: |
390 # During grace period. | 385 # During grace period. |
391 if now >= timed_out + task_details.grace_period: | 386 if not kill_sent and now >= timed_out + task_details.grace_period: |
392 # Now kill for real. The user can distinguish between the following | 387 # Now kill for real. The user can distinguish between the following |
393 # states: | 388 # states: |
394 # - signal but process exited within grace period, | 389 # - signal but process exited within grace period, |
395 # (hard_|io_)_timed_out will be set but the process exit code will | 390 # (hard_|io_)_timed_out will be set but the process exit code will |
396 # be script provided. | 391 # be script provided. |
397 # - processed exited late, exit code will be -9 on posix. | 392 # - processed exited late, exit code will be -9 on posix. |
398 try: | 393 logging.warning('Grace exhausted; sending SIGKILL') |
399 logging.warning('proc.kill() after grace') | 394 proc.kill() |
400 proc.kill() | 395 kill_sent = True |
401 except OSError: | |
402 pass | |
403 logging.info('Waiting for proces exit') | 396 logging.info('Waiting for proces exit') |
404 exit_code = proc.wait() | 397 exit_code = proc.wait() |
405 except MustExit as e: | 398 except MustExit as e: |
406 # TODO(maruel): Do the send SIGTERM to child process and give it | 399 # TODO(maruel): Do the send SIGTERM to child process and give it |
407 # task_details.grace_period to terminate. | 400 # task_details.grace_period to terminate. |
408 must_signal_internal_failure = ( | 401 must_signal_internal_failure = ( |
409 u'task_runner received signal %s' % e.signal) | 402 u'task_runner received signal %s' % e.signal) |
410 exit_code = kill_and_wait(proc, 'signal %d' % e.signal) | 403 exit_code = kill_and_wait(proc, 'signal %d' % e.signal) |
411 except (IOError, OSError): | 404 except (IOError, OSError): |
412 # Something wrong happened, try to kill the child process. | 405 # Something wrong happened, try to kill the child process. |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
484 if options.start > now: | 477 if options.start > now: |
485 options.start = now | 478 options.start = now |
486 | 479 |
487 try: | 480 try: |
488 load_and_run( | 481 load_and_run( |
489 options.in_file, remote, options.cost_usd_hour, options.start, | 482 options.in_file, remote, options.cost_usd_hour, options.start, |
490 options.out_file) | 483 options.out_file) |
491 return 0 | 484 return 0 |
492 finally: | 485 finally: |
493 logging.info('quitting') | 486 logging.info('quitting') |
OLD | NEW |