Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: appengine/swarming/server/bot_management.py

Issue 2402543002: Create a BotEvent when a bot is leased from Machine Provider (Closed)
Patch Set: Fix tests Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2014 The LUCI Authors. All rights reserved. 1 # Copyright 2014 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """Swarming bot management, e.g. list of known bots and their state. 5 """Swarming bot management, e.g. list of known bots and their state.
6 6
7 +---------+ 7 +---------+
8 |BotRoot | 8 |BotRoot |
9 |id=bot_id| 9 |id=bot_id|
10 +---------+ 10 +---------+
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 # dimensions) or via self-health check. 77 # dimensions) or via self-health check.
78 # - dimensions['id'] is not exactly one item. 78 # - dimensions['id'] is not exactly one item.
79 # - invalid HTTP POST request keys. 79 # - invalid HTTP POST request keys.
80 # - BotSettings.quarantined was set at that moment. 80 # - BotSettings.quarantined was set at that moment.
81 quarantined = ndb.BooleanProperty(default=False) 81 quarantined = ndb.BooleanProperty(default=False)
82 82
83 # Affected by event_type == 'request_task', 'task_canceled', 'task_completed', 83 # Affected by event_type == 'request_task', 'task_canceled', 'task_completed',
84 # 'task_error'. 84 # 'task_error'.
85 task_id = ndb.StringProperty(indexed=False) 85 task_id = ndb.StringProperty(indexed=False)
86 86
87 # Machine Provider lease ID, for bots acquired from Machine Provider.
88 lease_id = ndb.StringProperty(indexed=False)
89
90 # UTC seconds from epoch when bot will be reclaimed by Machine Provider.
91 lease_expiration_ts = ndb.DateTimeProperty(indexed=False)
92
87 @property 93 @property
88 def dimensions(self): 94 def dimensions(self):
89 """Returns a dict representation of self.dimensions_flat.""" 95 """Returns a dict representation of self.dimensions_flat."""
90 if self.dimensions_old: 96 if self.dimensions_old:
91 return self.dimensions_old 97 return self.dimensions_old
92 out = {} 98 out = {}
93 for i in self.dimensions_flat: 99 for i in self.dimensions_flat:
94 k, v = i.split(':', 1) 100 k, v = i.split(':', 1)
95 out.setdefault(k, []).append(v) 101 out.setdefault(k, []).append(v)
96 return out 102 return out
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
168 174
169 class BotEvent(_BotCommon): 175 class BotEvent(_BotCommon):
170 """This entity is immutable. 176 """This entity is immutable.
171 177
172 Parent is BotRoot. Key id is monotonically decreasing with 178 Parent is BotRoot. Key id is monotonically decreasing with
173 datastore_utils.store_new_version(). 179 datastore_utils.store_new_version().
174 180
175 This entity is created on each bot state transition. 181 This entity is created on each bot state transition.
176 """ 182 """
177 ALLOWED_EVENTS = { 183 ALLOWED_EVENTS = {
178 'bot_connected', 'bot_error', 'bot_log', 'bot_rebooting', 'bot_shutdown', 184 'bot_connected', 'bot_error', 'bot_leased', 'bot_log', 'bot_rebooting',
179 'bot_terminate', 185 'bot_shutdown', 'bot_terminate',
180 'request_restart', 'request_update', 'request_sleep', 'request_task', 186 'request_restart', 'request_update', 'request_sleep', 'request_task',
181 'task_completed', 'task_error', 'task_update', 187 'task_completed', 'task_error', 'task_update',
182 } 188 }
183 # Dimensions are used for task selection. They are encoded as a list of 189 # Dimensions are used for task selection. They are encoded as a list of
184 # key:value. Keep in mind that the same key can be used multiple times. The 190 # key:value. Keep in mind that the same key can be used multiple times. The
185 # list must be sorted. 191 # list must be sorted.
186 # It is NOT indexed because this is not needed for events. 192 # It is NOT indexed because this is not needed for events.
187 dimensions_flat = ndb.StringProperty(repeated=True, indexed=False) 193 dimensions_flat = ndb.StringProperty(repeated=True, indexed=False)
188 194
189 # Common properties for all events (which includes everything in _BotCommon). 195 # Common properties for all events (which includes everything in _BotCommon).
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
305 - dimensions: Bot's dimensions as self-reported. If not provided, keep 311 - dimensions: Bot's dimensions as self-reported. If not provided, keep
306 previous value. 312 previous value.
307 - state: ephemeral state of the bot. It is expected to change constantly. If 313 - state: ephemeral state of the bot. It is expected to change constantly. If
308 not provided, keep previous value. 314 not provided, keep previous value.
309 - version: swarming_bot.zip version as self-reported. Used to spot if a bot 315 - version: swarming_bot.zip version as self-reported. Used to spot if a bot
310 failed to update promptly. If not provided, keep previous value. 316 failed to update promptly. If not provided, keep previous value.
311 - quarantined: bool to determine if the bot was declared quarantined. 317 - quarantined: bool to determine if the bot was declared quarantined.
312 - task_id: packed task id if relevant. Set to '' to zap the stored value. 318 - task_id: packed task id if relevant. Set to '' to zap the stored value.
313 - task_name: task name if relevant. Zapped when task_id is zapped. 319 - task_name: task name if relevant. Zapped when task_id is zapped.
314 - kwargs: optional values to add to BotEvent relevant to event_type. 320 - kwargs: optional values to add to BotEvent relevant to event_type.
321 - lease_id (in kwargs): ID assigned by Machine Provider for this bot.
322 - lease_expiration_ts (in kwargs): UTC seconds from epoch when Machine
323 Provider lease expires.
315 """ 324 """
316 if not bot_id: 325 if not bot_id:
317 return 326 return
318 327
319 # Retrieve the previous BotInfo and update it. 328 # Retrieve the previous BotInfo and update it.
320 info_key = get_info_key(bot_id) 329 info_key = get_info_key(bot_id)
321 bot_info = info_key.get() or BotInfo(key=info_key) 330 bot_info = info_key.get() or BotInfo(key=info_key)
322 bot_info.last_seen_ts = utils.utcnow() 331 bot_info.last_seen_ts = utils.utcnow()
323 bot_info.external_ip = external_ip 332 bot_info.external_ip = external_ip
324 bot_info.authenticated_as = authenticated_as 333 bot_info.authenticated_as = authenticated_as
325 if dimensions: 334 if dimensions:
326 bot_info.dimensions_flat = dimensions_to_flat(dimensions) 335 bot_info.dimensions_flat = dimensions_to_flat(dimensions)
327 if state: 336 if state:
328 bot_info.state = state 337 bot_info.state = state
329 if quarantined is not None: 338 if quarantined is not None:
330 bot_info.quarantined = quarantined 339 bot_info.quarantined = quarantined
331 if task_id is not None: 340 if task_id is not None:
332 bot_info.task_id = task_id 341 bot_info.task_id = task_id
333 if task_name: 342 if task_name:
334 bot_info.task_name = task_name 343 bot_info.task_name = task_name
335 if version is not None: 344 if version is not None:
336 bot_info.version = version 345 bot_info.version = version
346 if kwargs.get('lease_id') is not None:
347 bot_info.lease_id = kwargs['lease_id']
348 if kwargs.get('lease_expiration_ts') is not None:
349 bot_info.lease_expiration_ts = kwargs['lease_expiration_ts']
337 350
338 if event_type in ('request_sleep', 'task_update'): 351 if event_type in ('request_sleep', 'task_update'):
339 # Handle this specifically. It's not much of an even worth saving a BotEvent 352 # Handle this specifically. It's not much of an even worth saving a BotEvent
340 # for but it's worth updating BotInfo. The only reason BotInfo is GET is to 353 # for but it's worth updating BotInfo. The only reason BotInfo is GET is to
341 # keep first_seen_ts. It's not necessary to use a transaction here since no 354 # keep first_seen_ts. It's not necessary to use a transaction here since no
342 # BotEvent is being added, only last_seen_ts is really updated. 355 # BotEvent is being added, only last_seen_ts is really updated.
343 bot_info.put() 356 bot_info.put()
344 return 357 return
345 358
346 event = BotEvent( 359 event = BotEvent(
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
393 Returns: 406 Returns:
394 Tuple (True to restart, text message explaining the reason). 407 Tuple (True to restart, text message explaining the reason).
395 """ 408 """
396 # Periodically reboot bots to workaround OS level leaks (especially on Win). 409 # Periodically reboot bots to workaround OS level leaks (especially on Win).
397 running_time = state.get('running_time', 0) 410 running_time = state.get('running_time', 0)
398 assert isinstance(running_time, (int, float)) 411 assert isinstance(running_time, (int, float))
399 period = get_bot_reboot_period(bot_id, state) 412 period = get_bot_reboot_period(bot_id, state)
400 if period and running_time > period: 413 if period and running_time > period:
401 return True, 'Periodic reboot: running longer than %ds' % period 414 return True, 'Periodic reboot: running longer than %ds' % period
402 return False, '' 415 return False, ''
OLDNEW
« no previous file with comments | « appengine/swarming/handlers_bot_test.py ('k') | appengine/swarming/server/bot_management_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698