| OLD | NEW |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from datetime import datetime | 5 from datetime import datetime |
| 6 | 6 |
| 7 from twisted.python import log | 7 from twisted.python import log |
| 8 from twisted.internet import reactor | 8 from twisted.internet import reactor |
| 9 | 9 |
| 10 |
| 11 class FloatingSet(object): |
| 12 """A set describing available primary/floating slaves.""" |
| 13 def __init__(self): |
| 14 self._primary = set() |
| 15 self._floating = set() |
| 16 |
| 17 def AddPrimary(self, *s): |
| 18 self._primary.update(s) |
| 19 |
| 20 def AddFloating(self, *s): |
| 21 self._floating.update(s) |
| 22 |
| 23 def NextSlaveFunc(self, grace_period): |
| 24 """Returns a NextSlaveFunc that uses the contents of this set.""" |
| 25 return _FloatingNextSlaveFunc(self, grace_period) |
| 26 |
| 27 def Get(self): |
| 28 return (sorted(self._primary), sorted(self._floating)) |
| 29 |
| 30 def __str__(self): |
| 31 return '%s > %s' % ( |
| 32 ', '.join(sorted(self._primary)), |
| 33 ', '.join(sorted(self._floating))) |
| 34 |
| 35 |
| 10 class PokeBuilderTimer(object): | 36 class PokeBuilderTimer(object): |
| 11 def __init__(self, botmaster, buildername): | 37 def __init__(self, botmaster, buildername): |
| 12 self.botmaster = botmaster | 38 self.botmaster = botmaster |
| 13 self.buildername = buildername | 39 self.buildername = buildername |
| 14 self.delayed_call = None | 40 self.delayed_call = None |
| 15 | 41 |
| 16 def cancel(self): | 42 def cancel(self): |
| 17 if self.delayed_call is not None: | 43 if self.delayed_call is not None: |
| 18 self.delayed_call.cancel() | 44 self.delayed_call.cancel() |
| 19 self.delayed_call = None | 45 self.delayed_call = None |
| 20 | 46 |
| 21 def reset(self, delta): | 47 def reset(self, delta): |
| 22 if self.delayed_call is not None: | 48 if self.delayed_call is not None: |
| 23 current_delta = (datetime.fromtimestamp(self.delayed_call.getTime()) - | 49 current_delta = (datetime.fromtimestamp(self.delayed_call.getTime()) - |
| 24 datetime.datetime.now()) | 50 _get_now()) |
| 25 if delta < current_delta: | 51 if delta < current_delta: |
| 26 self.delayed_call.reset(delta.total_seconds()) | 52 self.delayed_call.reset(delta.total_seconds()) |
| 27 return | 53 return |
| 28 | 54 |
| 29 # Schedule a new call | 55 # Schedule a new call |
| 30 self.delayed_call = reactor.callLater( | 56 self.delayed_call = reactor.callLater( |
| 31 delta.total_seconds(), | 57 delta.total_seconds(), |
| 32 self._poke, | 58 self._poke, |
| 33 ) | 59 ) |
| 34 | 60 |
| 35 def _poke(self): | 61 def _poke(self): |
| 36 self.delayed_call = None | 62 self.delayed_call = None |
| 37 log.msg("Poking builds for builder %r" % (self.buildername,)) | 63 log.msg('Poking builds for builder [%s]' % (self.buildername,)) |
| 38 self.botmaster.maybeStartBuildsForBuilder(self.buildername) | 64 self.botmaster.maybeStartBuildsForBuilder(self.buildername) |
| 39 | 65 |
| 40 | 66 |
| 41 class FloatingNextSlaveFunc(object): | 67 class _FloatingNextSlaveFunc(object): |
| 42 """ | 68 """ |
| 43 This object, when used as a Builder's 'nextSlave' function, allows a strata- | 69 This object, when used as a Builder's 'nextSlave' function, allows a strata- |
| 44 based preferential treatment to be assigned to a Builder's Slaves. | 70 based preferential treatment to be assigned to a Builder's Slaves. |
| 45 | 71 |
| 46 The 'nextSlave' function is called on a scheduled build when an associated | 72 The 'nextSlave' function is called on a scheduled build when an associated |
| 47 slave becomes available, either coming online or finishing an existing build. | 73 slave becomes available, either coming online or finishing an existing build. |
| 48 These events are used as stimulus to enable the primary builder(s) to pick | 74 These events are used as stimulus to enable the primary builder(s) to pick |
| 49 up builds when appropriate. | 75 up builds when appropriate. |
| 50 | 76 |
| 51 1) If a Primary is available, the build will be assigned to them. | 77 1) If a Primary is available, the build will be assigned to them. |
| 52 2) If a Primary builder is busy or is still within its grace period for | 78 2) If a Primary builder is busy or is still within its grace period for |
| 53 unavailability, no slave will be assigned in anticipation of the | 79 unavailability, no slave will be assigned in anticipation of the |
| 54 'nextSlave' being re-invoked once the builder returns (1). If the grace | 80 'nextSlave' being re-invoked once the builder returns (1). If the grace |
| 55 period expires, we "poke" the master to call 'nextSlave', at which point | 81 period expires, we "poke" the master to call 'nextSlave', at which point |
| 56 the build will fall through to a lower strata. | 82 the build will fall through to a lower strata. |
| 57 3) If a Primary slave is offline past its grace period, the build will be | 83 3) If a Primary slave is offline past its grace period, the build will be |
| 58 assigned to a Floating slave. | 84 assigned to a Floating slave. |
| 59 | 85 |
| 60 Args: | 86 Args: |
| 61 strata_property: (str) The name of the Builder property to use to identify | 87 fs (FloatingSet): The set of available primary/floating slaves. |
| 62 its strata. | 88 grace_period: (timedelta) The amount of time that a slave can be offline |
| 63 strata: (list) A list of strata values ordered by selection priority | 89 before builds fall through to a lower strata. |
| 64 grace_period: (None/timedelta) If not None, the amount of time that a slave | |
| 65 can be offline before builds fall through to a lower strata. | |
| 66 """ | 90 """ |
| 67 | 91 |
| 68 def __init__(self, strata_property, strata, grace_period=None): | 92 def __init__(self, fs, grace_period): |
| 69 self._strata = tuple(strata) | 93 self._primary, self._floating = fs.Get() |
| 70 self._strata_property = strata_property | 94 self._fs = fs |
| 71 self._grace_period = grace_period | 95 self._grace_period = grace_period |
| 72 self._slave_strata_map = {} | |
| 73 self._slave_seen_times = {} | 96 self._slave_seen_times = {} |
| 74 self._poke_builder_timers = {} | 97 self._poke_builder_timers = {} |
| 75 self.verbose = False | 98 self.verbose = False |
| 76 | 99 |
| 77 def __repr__(self): | 100 def __repr__(self): |
| 78 return '%s(%s)' % (type(self).__name__, ' > '.join(self._strata)) | 101 return '%s(%s)' % (type(self).__name__, self._fs) |
| 79 | 102 |
| 80 def __call__(self, builder, slave_builders): | 103 def __call__(self, builder, slave_builders): |
| 81 """Main 'nextSlave' invocation point. | 104 """Main 'nextSlave' invocation point. |
| 82 | 105 |
| 83 When this is called, we are given the following information: | 106 When this is called, we are given the following information: |
| 84 - The Builder | 107 - The Builder |
| 85 - A set of 'SlaveBuilder' instances that are available and ready for | 108 - A set of 'SlaveBuilder' instances that are available and ready for |
| 86 assignment (slave_builders). | 109 assignment (slave_builders). |
| 87 - The total set of ONLINE 'SlaveBuilder' instances associated with | 110 - The total set of ONLINE 'SlaveBuilder' instances associated with |
| 88 'builder' (builder.slaves) | 111 'builder' (builder.slaves) |
| 89 - The set of all slaves configured for Builder (via | 112 - The set of all slaves configured for Builder (via |
| 90 '_get_all_slave_status') | 113 '_get_all_slave_status') |
| 91 | 114 |
| 92 We compile that into a stateful awareness and use it as a decision point. | 115 We compile that into a stateful awareness and use it as a decision point. |
| 93 Based on the slave availability and grace period, we will either: | 116 Based on the slave availability and grace period, we will either: |
| 94 (1) Return a slave immediately to claim this build | 117 (1) Return a slave immediately to claim this build. We do this if: |
| 95 (2) Return 'None' (delaying the build) in anticipation of a higher-strata | 118 (1a) There was a "primary" build slave available, or |
| 96 slave becoming available. | 119 (1b) We are outside of all of the grace periods for the primary slaves, |
| 120 and there is a floating builder available. |
| 121 (2) Return 'None' (delaying the build) in anticipation of primary/floating |
| 122 availability. |
| 97 | 123 |
| 98 If we go with (2), we will schedule a 'poke' timer to stimulate a future | 124 If we go with (2), we will schedule a 'poke' timer to stimulate a future |
| 99 'nextSlave' call if the only higher-strata slave candidates are currently | 125 'nextSlave' call, since BuildBot only checks for builds on explicit slave |
| 100 offline. We do this because they could be permanently offline, so there's | 126 availability edges. This covers the case where floating builders are |
| 101 no guarentee that a 'nextSlave' will be naturally called in any time frame. | 127 available, but aren't enlisted because we're within the grace period. In |
| 128 this case, we need to re-evaluate slaves after the grace period expires, |
| 129 but actual slave state won't haev changed, so no new slave availabilty edge |
| 130 will have occurred. |
| 102 """ | 131 """ |
| 103 self._debug("Calling %r with builder=[%s], slaves=[%s]", | 132 self._debug("Calling [%s] with builder=[%s], slaves=[%s]", |
| 104 self, builder, slave_builders) | 133 self, builder, slave_builders) |
| 105 self._cancel_builder_timer(builder) | 134 self._cancel_builder_timer(builder) |
| 106 | 135 |
| 107 # Get the set of all 'SlaveStatus' assigned to this Builder (idle, busy, | 136 # Get the set of all 'SlaveStatus' assigned to this Builder (idle, busy, |
| 108 # and offline). | 137 # and offline). |
| 109 slave_status_map = dict( | 138 slave_status_map = dict( |
| 110 (slave_status.name, slave_status) | 139 (slave_status.name, slave_status) |
| 111 for slave_status in self._get_all_slave_status(builder) | 140 for slave_status in self._get_all_slave_status(builder) |
| 112 ) | 141 ) |
| 113 | 142 |
| 114 # Index proposed 'nextSlave' slaves by name | 143 # Record the names of the slaves that were proposed. |
| 115 proposed_slave_builder_map = {} | 144 proposed_slave_builder_map = {} |
| 116 for slave_builder in slave_builders: | 145 for slave_builder in slave_builders: |
| 117 proposed_slave_builder_map[slave_builder.slave.slavename] = slave_builder | 146 proposed_slave_builder_map[slave_builder.slave.slavename] = slave_builder |
| 118 | 147 |
| 119 # Calculate the oldest a slave can be before we assume something's wrong. | 148 # Calculate the oldest a slave can be before we assume something's wrong. |
| 120 grace_threshold = now = None | 149 now = _get_now() |
| 121 if self._grace_period is not None: | 150 grace_threshold = (now - self._grace_period) |
| 122 now = datetime.now() | |
| 123 grace_threshold = (now - self._grace_period) | |
| 124 | 151 |
| 125 # Index all builder slaves (even busy ones) by name. Also, record this | 152 # Record the last time we've seen any of these slaves online. |
| 126 # slave's strata so we can reference it even if the slave goes offline | |
| 127 # in the future. | |
| 128 online_slave_builders = set() | 153 online_slave_builders = set() |
| 129 for slave_builder in builder.slaves: | 154 for slave_builder in builder.slaves: |
| 130 build_slave = slave_builder.slave | 155 build_slave = slave_builder.slave |
| 131 if build_slave is None: | 156 if build_slave is None: |
| 132 continue | 157 continue |
| 133 self._record_strata(build_slave) | 158 self._record_slave_seen_time(build_slave, now) |
| 134 if now is not None: | |
| 135 self._record_slave_seen_time(build_slave, now) | |
| 136 online_slave_builders.add(build_slave.slavename) | 159 online_slave_builders.add(build_slave.slavename) |
| 137 | 160 |
| 138 # Check the strata, in order. | 161 self._debug('Online proposed slaves: [%s]', |
| 139 for stratum in self._strata: | 162 slave_builders) |
| 140 busy_slaves = [] | |
| 141 offline_slaves = [] | |
| 142 wait_delta = None | |
| 143 | 163 |
| 144 for slave_name in self._slave_strata_map.get(stratum, ()): | 164 # Are there any primary slaves that are proposed? If so, use it |
| 145 self._debug("Considering slave %r for stratum %r", slave_name, stratum) | 165 within_grace_period = [] |
| 166 some_primary_were_busy = False |
| 167 wait_delta = None |
| 168 for slave_name in self._primary: |
| 169 self._debug('Considering primary slave [%s]', slave_name) |
| 146 | 170 |
| 147 # Get the 'SlaveStatus' object for this slave | 171 # Was this slave proposed to 'nextSlave'? |
| 148 slave_status = slave_status_map.get(slave_name) | 172 slave_builder = proposed_slave_builder_map.get(slave_name) |
| 149 if slave_status is None: | 173 if slave_builder is not None: |
| 150 continue | 174 # Yes. Use it! |
| 175 self._debug('Slave [%s] is available', slave_name) |
| 176 return slave_builder |
| 151 | 177 |
| 152 # Was this slave proposed by 'nextSlave'? | 178 # Is this slave online? If so, we won't consider floating candiates. |
| 179 if slave_name in online_slave_builders: |
| 180 # The slave is online, but is not proposed (BUSY); add it to the |
| 181 # desired slaves list. |
| 182 self._debug('Slave [%s] is online but BUSY.', slave_name) |
| 183 within_grace_period.append(slave_name) |
| 184 some_primary_were_busy = True |
| 185 continue |
| 186 |
| 187 # Get the 'SlaveStatus' object for this slave |
| 188 slave_status = slave_status_map.get(slave_name) |
| 189 if slave_status is None: |
| 190 continue |
| 191 |
| 192 # The slave is offline. Is this slave within the grace period? |
| 193 last_seen = self._get_latest_seen_time(slave_status) |
| 194 if last_seen < grace_threshold: |
| 195 # No, the slave is older than our grace period. |
| 196 self._debug('Slave [%s] is OFFLINE and outside grace period ' |
| 197 '(%s < %s).', slave_name, last_seen, grace_threshold) |
| 198 continue |
| 199 |
| 200 # This slave is within its grace threshold. Add it to the list of |
| 201 # desired slaves from this set and update our wait delta in case we |
| 202 # have to poke. |
| 203 # |
| 204 # We track the longest grace period delta, since after this point if |
| 205 # no slaves have taken the build we would otherwise hang. |
| 206 self._debug('Slave %r is OFFLINE but within grace period ' |
| 207 '(%s >= %s).', slave_name, last_seen, grace_threshold) |
| 208 within_grace_period.append(slave_name) |
| 209 slave_wait_delta = (self._grace_period - (now - last_seen)) |
| 210 if (wait_delta is None) or (slave_wait_delta > wait_delta): |
| 211 wait_delta = slave_wait_delta |
| 212 |
| 213 # We've looped through all primary slaves, and none of them were available. |
| 214 # Were some within the grace period? |
| 215 if not within_grace_period: |
| 216 # We're outside of our grace period. Are there floating slaves that we |
| 217 # can use? |
| 218 for slave_name in self._floating: |
| 153 slave_builder = proposed_slave_builder_map.get(slave_name) | 219 slave_builder = proposed_slave_builder_map.get(slave_name) |
| 154 if slave_builder is not None: | 220 if slave_builder is not None: |
| 155 # Yes. Use it! | 221 # Yes. Use it! |
| 156 self._debug("Slave %r is available", slave_name) | 222 self._debug('Slave [%s] is available', slave_name) |
| 157 return slave_builder | 223 return slave_builder |
| 158 | 224 |
| 159 # Is this slave online? | 225 self._debug('No slaves are available; returning None') |
| 160 if slave_name in online_slave_builders: | 226 return None |
| 161 # The slave is online, but is not proposed (BUSY); add it to the | |
| 162 # desired slaves list. | |
| 163 self._debug("Slave %r is online but BUSY; marking preferred", | |
| 164 slave_name) | |
| 165 busy_slaves.append(slave_name) | |
| 166 continue | |
| 167 | 227 |
| 168 # The slave is offline; do we have a grace period? | 228 # We're going to return 'None' to wait for a primary slave. If all of |
| 169 if grace_threshold is None: | 229 # the slaves that we're anticipating are offline, schedule a 'poke' |
| 170 # No grace period, so this slave is not a candidate | 230 # after the last candidate has exceeded its grace period to allow the |
| 171 self._debug("Slave %r is OFFLINE with no grace period; ignoring", | 231 # build to go to lower strata. |
| 172 slave_name) | 232 log.msg('Returning None in anticipation of unavailable primary slaves. ' |
| 173 continue | 233 'Please disregard the following BuildBot `nextSlave` ' |
| 234 'error: %s' % (within_grace_period,)) |
| 174 | 235 |
| 175 # Yes; is this slave within the grace period? | 236 if (not some_primary_were_busy) and (wait_delta is not None): |
| 176 last_seen = self._get_latest_seen_time(slave_status) | 237 self._debug('Scheduling ping for [%s] in [%s]', |
| 177 if last_seen < grace_threshold: | 238 builder.name, wait_delta) |
| 178 # Not within grace period, so this slave is out. | 239 self._schedule_builder_timer(builder, wait_delta) |
| 179 self._debug("Slave %r is OFFLINE and outside of grace period " | |
| 180 "(%s < %s); ignoring", | |
| 181 slave_name, last_seen, grace_threshold) | |
| 182 continue | |
| 183 | |
| 184 # This slave is within its grace threshold. Add it to the list of | |
| 185 # desired stratum slaves and update our wait delta in case we have to | |
| 186 # poke. | |
| 187 # | |
| 188 # We track the longest grace period delta, since after this point if | |
| 189 # no slaves have taken the build we would otherwise hang. | |
| 190 self._debug("Slave %r is OFFLINE but within grace period " | |
| 191 "(%s >= %s); marking preferred", | |
| 192 slave_name, last_seen, grace_threshold) | |
| 193 offline_slaves.append(slave_name) | |
| 194 slave_wait_delta = (self._grace_period - (now - last_seen)) | |
| 195 if (wait_delta is None) or (slave_wait_delta > wait_delta): | |
| 196 wait_delta = slave_wait_delta | |
| 197 | |
| 198 # We've looped through our stratum and found no proposed candidates. Are | |
| 199 # there any preferred ones? | |
| 200 if busy_slaves or offline_slaves: | |
| 201 log.msg("Returning 'None' in anticipation of unavailable slaves. " | |
| 202 "Please disregard the following BuildBot 'nextSlave' " | |
| 203 "error: %s" % (busy_slaves + offline_slaves,)) | |
| 204 | |
| 205 # We're going to return 'None' to wait for a preferred slave. If all of | |
| 206 # the slaves that we're anticipating are offline, schedule a 'poke' | |
| 207 # after the last candidate has exceeded its grace period to allow the | |
| 208 # build to go to lower strata. | |
| 209 if (not busy_slaves) and (wait_delta is not None): | |
| 210 self._debug("Scheduling 'ping' for %r in %s", | |
| 211 builder.name, wait_delta) | |
| 212 self._schedule_builder_timer( | |
| 213 builder, | |
| 214 wait_delta, | |
| 215 ) | |
| 216 return None | |
| 217 | |
| 218 self._debug("No slaves are available; returning 'None'") | |
| 219 return None | 240 return None |
| 220 | 241 |
| 221 def _debug(self, fmt, *args): | 242 def _debug(self, fmt, *args): |
| 222 if not self.verbose: | 243 if not self.verbose: |
| 223 return | 244 return |
| 224 log.msg(fmt % args) | 245 log.msg(fmt % args) |
| 225 | 246 |
| 226 @staticmethod | 247 @staticmethod |
| 227 def _get_all_slave_status(builder): | 248 def _get_all_slave_status(builder): |
| 228 # Try using the builder's BuilderStatus object to get a list of all slaves | 249 # Try using the builder's BuilderStatus object to get a list of all slaves |
| (...skipping 16 matching lines...) Expand all Loading... |
| 245 | 266 |
| 246 # Add the last time we've seen the slave in our 'nextSlave' function | 267 # Add the last time we've seen the slave in our 'nextSlave' function |
| 247 last_seen_time = self._slave_seen_times.get(slave_status.name) | 268 last_seen_time = self._slave_seen_times.get(slave_status.name) |
| 248 if last_seen_time is not None: | 269 if last_seen_time is not None: |
| 249 times.append(last_seen_time) | 270 times.append(last_seen_time) |
| 250 | 271 |
| 251 if not times: | 272 if not times: |
| 252 return None | 273 return None |
| 253 return max(times) | 274 return max(times) |
| 254 | 275 |
| 255 def _record_strata(self, build_slave): | |
| 256 stratum = build_slave.properties.getProperty(self._strata_property) | |
| 257 strata_set = self._slave_strata_map.get(stratum) | |
| 258 if strata_set is None: | |
| 259 strata_set = set() | |
| 260 self._slave_strata_map[stratum] = strata_set | |
| 261 strata_set.add(build_slave.slavename) | |
| 262 | |
| 263 def _record_slave_seen_time(self, build_slave, now): | 276 def _record_slave_seen_time(self, build_slave, now): |
| 264 self._slave_seen_times[build_slave.slavename] = now | 277 self._slave_seen_times[build_slave.slavename] = now |
| 265 | 278 |
| 266 def _schedule_builder_timer(self, builder, delta): | 279 def _schedule_builder_timer(self, builder, delta): |
| 267 poke_builder_timer = self._poke_builder_timers.get(builder.name) | 280 poke_builder_timer = self._poke_builder_timers.get(builder.name) |
| 268 if poke_builder_timer is None: | 281 if poke_builder_timer is None: |
| 269 poke_builder_timer = PokeBuilderTimer( | 282 poke_builder_timer = PokeBuilderTimer( |
| 270 builder.botmaster, | 283 builder.botmaster, |
| 271 builder.name, | 284 builder.name, |
| 272 ) | 285 ) |
| 273 self._poke_builder_timers[builder.name] = poke_builder_timer | 286 self._poke_builder_timers[builder.name] = poke_builder_timer |
| 274 poke_builder_timer.reset(delta) | 287 poke_builder_timer.reset(delta) |
| 275 | 288 |
| 276 def _cancel_builder_timer(self, builder): | 289 def _cancel_builder_timer(self, builder): |
| 277 poke_builder_timer = self._poke_builder_timers.get(builder.name) | 290 poke_builder_timer = self._poke_builder_timers.get(builder.name) |
| 278 if poke_builder_timer is None: | 291 if poke_builder_timer is None: |
| 279 return | 292 return |
| 280 poke_builder_timer.cancel() | 293 poke_builder_timer.cancel() |
| 294 |
| 295 |
| 296 def _get_now(): |
| 297 """Returns (datetime.datetime): The current time. |
| 298 |
| 299 This exists so it can be overridden by mocks in unit tests. |
| 300 """ |
| 301 return datetime.datetime.now() |
| 302 |
| 303 |
| OLD | NEW |