appengine/swarming/server/task_scheduler.py - Issue 2298053002: Add more safety checks.

Unified Diff: appengine/swarming/server/task_scheduler.py

Issue 2298053002: Add more safety checks. (Closed)

Patch Set: More check Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: appengine/swarming/server/task_scheduler.py

diff --git a/appengine/swarming/server/task_scheduler.py b/appengine/swarming/server/task_scheduler.py

index f0394b50167ee6a96e04e52c16e75a773182d78a..a1478f08a98f1a5f751f4369a1e60f7e66ecce90 100644

--- a/appengine/swarming/server/task_scheduler.py

+++ b/appengine/swarming/server/task_scheduler.py

@@ -474,39 +474,50 @@ def schedule_request(request):

# TODO(maruel): Make a reverse map on successful task completion so this

# becomes a simple ndb.get().

- dupe_summary = cls.query(cls.properties_hash==h).order(cls.key).get()

- if dupe_summary:

+ count = 0

+ for dupe_summary in cls.query(cls.properties_hash==h):

Vadim Sh. 2016/08/31 19:46:06 no order by key anymore?

Vadim Sh. 2016/08/31 19:46:06 nit: for i, dupe_summary in iterate(....): ...

M-A Ruel 2016/08/31 20:26:30 Done.

M-A Ruel 2016/08/31 20:26:30 Humm not sure why I had removed it, added back.

+ count += 1

+ # It is possible for the query to return stale items.

+ if (dupe_summary.state != task_result.State.COMPLETED or

+ dupe_summary.failure):

+ if count == 3:

Vadim Sh. 2016/08/31 19:46:06 Then also use smaller batch_size (it is 20 by defa

M-A Ruel 2016/08/31 20:26:30 Done.

+ # Indexes are very inconsistent, give up.

+ break

+ continue

# Refuse tasks older than X days. This is due to the isolate server

# dropping files.

# TODO(maruel): The value should be calculated from the isolate server

# setting and be unbounded when no isolated input was used.

oldest = now - datetime.timedelta(

seconds=config.settings().reusable_task_age_secs)

- if dupe_summary and dupe_summary.created_ts > oldest:

- # Setting task.queue_number to None removes it from the scheduling.

- task.queue_number = None

- _copy_summary(

- dupe_summary, result_summary,

- ('created_ts', 'modified_ts', 'name', 'user', 'tags'))

- # Zap irrelevant properties. PerformanceStats is also not copied over,

- # since it's not relevant.

- result_summary.properties_hash = None

- result_summary.try_number = 0

- result_summary.cost_saved_usd = result_summary.cost_usd

- # Only zap after.

- result_summary.costs_usd = []

- result_summary.deduped_from = task_pack.pack_run_result_key(

- dupe_summary.run_result_key)

- # In this code path, there's not much to do as the task will not be run,

- # previous results are returned. We still need to store all the entities

- # correctly.

- datastore_utils.insert(

- request, get_new_keys, extra=[task, result_summary])

- logging.debug(

- 'New request %s reusing %s', result_summary.task_id,

- dupe_summary.task_id)

- stored = True

- deduped = True

+ if dupe_summary.created_ts <= oldest:

+ # Task is too old.

+ break

+ # Setting task.queue_number to None removes it from the scheduling.

Vadim Sh. 2016/08/31 19:46:06 nit: break out of the loop here and do the rest of

M-A Ruel 2016/08/31 20:26:30 Refactored searching code into a function.

+ task.queue_number = None

+ _copy_summary(

+ dupe_summary, result_summary,

+ ('created_ts', 'modified_ts', 'name', 'user', 'tags'))

+ # Zap irrelevant properties. PerformanceStats is also not copied over,

+ # since it's not relevant.

+ result_summary.properties_hash = None

+ result_summary.try_number = 0

+ result_summary.cost_saved_usd = result_summary.cost_usd

+ # Only zap after.

+ result_summary.costs_usd = []

+ result_summary.deduped_from = task_pack.pack_run_result_key(

+ dupe_summary.run_result_key)

+ # In this code path, there's not much to do as the task will not be run,

+ # previous results are returned. We still need to store all the entities

+ # correctly.

+ datastore_utils.insert(

+ request, get_new_keys, extra=[task, result_summary])

+ logging.debug(

+ 'New request %s reusing %s', result_summary.task_id,

+ dupe_summary.task_id)

+ stored = True

+ deduped = True

+ break

if not stored:

# Storing these entities makes this task live. It is important at this point

« no previous file with comments | « appengine/swarming/server/task_result.py ('k') | no next file » | no next file with comments »