Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright 2012 The LUCI Authors. All rights reserved. | 2 # Copyright 2012 The LUCI Authors. All rights reserved. |
| 3 # Use of this source code is governed under the Apache License, Version 2.0 | 3 # Use of this source code is governed under the Apache License, Version 2.0 |
| 4 # that can be found in the LICENSE file. | 4 # that can be found in the LICENSE file. |
| 5 | 5 |
| 6 """Runs a command with optional isolated input/output. | 6 """Runs a command with optional isolated input/output. |
| 7 | 7 |
| 8 Despite name "run_isolated", can run a generic non-isolated command specified as | 8 Despite name "run_isolated", can run a generic non-isolated command specified as |
| 9 args. | 9 args. |
| 10 | 10 |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 98 # It is recommended to start the script with a `root_dir` as short as | 98 # It is recommended to start the script with a `root_dir` as short as |
| 99 # possible. | 99 # possible. |
| 100 # - ir stands for isolated_run | 100 # - ir stands for isolated_run |
| 101 # - io stands for isolated_out | 101 # - io stands for isolated_out |
| 102 # - it stands for isolated_tmp | 102 # - it stands for isolated_tmp |
| 103 ISOLATED_RUN_DIR = u'ir' | 103 ISOLATED_RUN_DIR = u'ir' |
| 104 ISOLATED_OUT_DIR = u'io' | 104 ISOLATED_OUT_DIR = u'io' |
| 105 ISOLATED_TMP_DIR = u'it' | 105 ISOLATED_TMP_DIR = u'it' |
| 106 | 106 |
| 107 | 107 |
| 108 OUTLIVING_ZOMBIE_MSG = """\ | |
| 109 *** Swarming tried multiple times to delete the %s directory and failed *** | |
| 110 *** Hard failing the task *** | |
| 111 | |
| 112 Swarming detected that your testing script ran an executable, which may have | |
| 113 started a child executable, and the main script returned early, leaving the | |
| 114 children executables playing around unguided. | |
| 115 | |
| 116 You don't want to leave children processes outliving the task on the Swarming | |
| 117 bot, do you? The Swarming bot doesn't. | |
| 118 | |
| 119 How to fix? | |
| 120 - For any process that starts children processes, make sure all children | |
| 121 processes terminated properly before each parent process exits. This is | |
| 122 especially important in very deep process trees. | |
| 123 - This must be done properly both in normal successful task and in case of | |
| 124 task failure. Cleanup is very important. | |
| 125 - The Swarming bot sends a SIGTERM in case of timeout. | |
| 126 - You have %s seconds to comply after the signal was sent to the process | |
| 127 before the process is forcibly killed. | |
| 128 - To achieve not leaking children processes in case of signals on timeout, you | |
| 129 MUST handle signals in each executable / python script and propagate them to | |
| 130 children processes. | |
| 131 - When your test script (python or binary) receives a signal like SIGTERM or | |
| 132 CTRL_BREAK_EVENT on Windows), send it to all children processes and wait for | |
| 133 them to terminate before quitting. | |
| 134 | |
| 135 See | |
| 136 https://github.com/luci/luci-py/blob/master/appengine/swarming/doc/Bot.md#gracef ul-termination-aka-the-sigterm-and-sigkill-dance | |
| 137 for more information. | |
| 138 | |
| 139 *** May the SIGKILL force be with you *** | |
| 140 """ | |
| 141 | |
| 142 | |
| 108 def get_as_zip_package(executable=True): | 143 def get_as_zip_package(executable=True): |
| 109 """Returns ZipPackage with this module and all its dependencies. | 144 """Returns ZipPackage with this module and all its dependencies. |
| 110 | 145 |
| 111 If |executable| is True will store run_isolated.py as __main__.py so that | 146 If |executable| is True will store run_isolated.py as __main__.py so that |
| 112 zip package is directly executable be python. | 147 zip package is directly executable be python. |
| 113 """ | 148 """ |
| 114 # Building a zip package when running from another zip package is | 149 # Building a zip package when running from another zip package is |
| 115 # unsupported and probably unneeded. | 150 # unsupported and probably unneeded. |
| 116 assert not zip_package.is_zipped_module(sys.modules[__name__]) | 151 assert not zip_package.is_zipped_module(sys.modules[__name__]) |
| 117 assert THIS_FILE_PATH | 152 assert THIS_FILE_PATH |
| (...skipping 422 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 540 # process locks *.exe file). Examine out_dir only after that call | 575 # process locks *.exe file). Examine out_dir only after that call |
| 541 # completes (since child processes may write to out_dir too and we need | 576 # completes (since child processes may write to out_dir too and we need |
| 542 # to wait for them to finish). | 577 # to wait for them to finish). |
| 543 if fs.isdir(run_dir): | 578 if fs.isdir(run_dir): |
| 544 try: | 579 try: |
| 545 success = file_path.rmtree(run_dir) | 580 success = file_path.rmtree(run_dir) |
| 546 except OSError as e: | 581 except OSError as e: |
| 547 logging.error('Failure with %s', e) | 582 logging.error('Failure with %s', e) |
| 548 success = False | 583 success = False |
| 549 if not success: | 584 if not success: |
| 550 print >> sys.stderr, ( | 585 sys.stderr.write(OUTLIVING_ZOMBIE_MSG % ('run', grace_period)) |
| 551 'Failed to delete the run directory, thus failing the task.\n' | |
| 552 'This may be due to a subprocess outliving the main task\n' | |
| 553 'process, holding on to resources. Please fix the task so\n' | |
| 554 'that it releases resources and cleans up subprocesses.') | |
| 555 if result['exit_code'] == 0: | 586 if result['exit_code'] == 0: |
| 556 result['exit_code'] = 1 | 587 result['exit_code'] = 1 |
| 557 if fs.isdir(tmp_dir): | 588 if fs.isdir(tmp_dir): |
| 558 try: | 589 try: |
| 559 success = file_path.rmtree(tmp_dir) | 590 success = file_path.rmtree(tmp_dir) |
| 560 except OSError as e: | 591 except OSError as e: |
| 561 logging.error('Failure with %s', e) | 592 logging.error('Failure with %s', e) |
| 562 success = False | 593 success = False |
| 563 if not success: | 594 if not success: |
| 564 print >> sys.stderr, ( | 595 sys.stderr.write(OUTLIVING_ZOMBIE_MSG % ('run', grace_period)) |
|
Taylor_Brandstetter
2017/06/08 17:58:46
Should be "temp"?
M-A Ruel
2017/06/08 18:14:07
Argh, I got caught copy-pasting. Fixing.
| |
| 565 'Failed to delete the temp directory, thus failing the task.\n' | |
| 566 'This may be due to a subprocess outliving the main task\n' | |
| 567 'process, holding on to resources. Please fix the task so\n' | |
| 568 'that it releases resources and cleans up subprocesses.') | |
| 569 if result['exit_code'] == 0: | 596 if result['exit_code'] == 0: |
| 570 result['exit_code'] = 1 | 597 result['exit_code'] = 1 |
| 571 | 598 |
| 572 # This deletes out_dir if leak_temp_dir is not set. | 599 # This deletes out_dir if leak_temp_dir is not set. |
| 573 if out_dir: | 600 if out_dir: |
| 574 isolated_stats = result['stats'].setdefault('isolated', {}) | 601 isolated_stats = result['stats'].setdefault('isolated', {}) |
| 575 result['outputs_ref'], success, isolated_stats['upload'] = ( | 602 result['outputs_ref'], success, isolated_stats['upload'] = ( |
| 576 delete_and_upload(storage, out_dir, leak_temp_dir)) | 603 delete_and_upload(storage, out_dir, leak_temp_dir)) |
| 577 if not success and result['exit_code'] == 0: | 604 if not success and result['exit_code'] == 0: |
| 578 result['exit_code'] = 1 | 605 result['exit_code'] = 1 |
| (...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1068 return 1 | 1095 return 1 |
| 1069 | 1096 |
| 1070 | 1097 |
| 1071 if __name__ == '__main__': | 1098 if __name__ == '__main__': |
| 1072 subprocess42.inhibit_os_error_reporting() | 1099 subprocess42.inhibit_os_error_reporting() |
| 1073 # Ensure that we are always running with the correct encoding. | 1100 # Ensure that we are always running with the correct encoding. |
| 1074 fix_encoding.fix_encoding() | 1101 fix_encoding.fix_encoding() |
| 1075 file_path.enable_symlink() | 1102 file_path.enable_symlink() |
| 1076 | 1103 |
| 1077 sys.exit(main(sys.argv[1:])) | 1104 sys.exit(main(sys.argv[1:])) |
| OLD | NEW |