Chromium Code Reviews| Index: appengine/swarming/server/task_scheduler.py |
| diff --git a/appengine/swarming/server/task_scheduler.py b/appengine/swarming/server/task_scheduler.py |
| index 4911343ec322f03fbd201d897e8ef999dbb93d7e..5aa9035b7ce4facaf7f0e3732209b71151b7e0b3 100644 |
| --- a/appengine/swarming/server/task_scheduler.py |
| +++ b/appengine/swarming/server/task_scheduler.py |
| @@ -147,6 +147,9 @@ def _reap_task(bot_dimensions, bot_version, to_run_key, request): |
| run_result = task_result.new_run_result( |
| request, (result_summary.try_number or 0) + 1, bot_id, bot_version, |
| bot_dimensions) |
| + # Upon bot reap, both .started_ts and .modified_ts matches. They differ on |
| + # the first ping. |
| + run_result.started_ts = now |
| run_result.modified_ts = now |
| result_summary.set_from_run_result(run_result, request) |
| ndb.put_multi([to_run, run_result, result_summary]) |
| @@ -199,6 +202,7 @@ def _handle_dead_bot(run_result_key): |
| return None, run_result.bot_id |
| run_result.signal_server_version(server_version) |
| + old_modified = run_result.modified_ts |
| run_result.modified_ts = now |
| orig_summary_state = result_summary.state |
| @@ -211,8 +215,14 @@ def _handle_dead_bot(run_result_key): |
| run_result.abandoned_ts = now |
| task_is_retried = None |
| elif (result_summary.try_number == 1 and now < request.expiration_ts and |
| - request.properties.idempotent): |
| - # Retry it. |
| + (request.properties.idempotent or |
| + run_result.started_ts == old_modified)): |
| + # Retry it. It fits: |
| + # - first try |
| + # - not yet expired |
| + # - One of: |
| + # - idempotent |
| + # - task hadn't got any ping at all from task_runner.run_command() |
|
Vadim Sh.
2017/06/08 22:06:01
What happens if the ping from first bot then comes
M-A Ruel
2017/06/12 17:16:21
Added test_bot_poll_http_500_but_bot_reapears_afte
|
| to_put = (run_result, result_summary, to_run) |
| to_run.queue_number = task_to_run.gen_queue_number(request) |
| run_result.state = task_result.State.BOT_DIED |