| OLD | NEW |
| (Empty) |
| 1 | |
| 2 import os.path | |
| 3 | |
| 4 import buildbot | |
| 5 | |
| 6 from twisted.spread import pb | |
| 7 from twisted.python import log | |
| 8 from twisted.internet import reactor, defer | |
| 9 from twisted.application import service, internet | |
| 10 from twisted.cred import credentials | |
| 11 | |
| 12 from buildbot.util import now | |
| 13 from buildbot.pbutil import ReconnectingPBClientFactory | |
| 14 from buildbot.slave import registry | |
| 15 # make sure the standard commands get registered. This import is performed | |
| 16 # for its side-effects. | |
| 17 from buildbot.slave import commands | |
| 18 # and make pyflakes think we aren't being stupid | |
| 19 commands = commands | |
| 20 | |
| 21 class NoCommandRunning(pb.Error): | |
| 22 pass | |
| 23 class WrongCommandRunning(pb.Error): | |
| 24 pass | |
| 25 class UnknownCommand(pb.Error): | |
| 26 pass | |
| 27 | |
| 28 class Master: | |
| 29 def __init__(self, host, port, username, password): | |
| 30 self.host = host | |
| 31 self.port = port | |
| 32 self.username = username | |
| 33 self.password = password | |
| 34 | |
| 35 class SlaveBuild: | |
| 36 | |
| 37 """This is an object that can hold state from one step to another in the | |
| 38 same build. All SlaveCommands have access to it. | |
| 39 """ | |
| 40 def __init__(self, builder): | |
| 41 self.builder = builder | |
| 42 | |
| 43 class SlaveBuilder(pb.Referenceable, service.Service): | |
| 44 | |
| 45 """This is the local representation of a single Builder: it handles a | |
| 46 single kind of build (like an all-warnings build). It has a name and a | |
| 47 home directory. The rest of its behavior is determined by the master. | |
| 48 """ | |
| 49 | |
| 50 stopCommandOnShutdown = True | |
| 51 | |
| 52 # remote is a ref to the Builder object on the master side, and is set | |
| 53 # when they attach. We use it to detect when the connection to the master | |
| 54 # is severed. | |
| 55 remote = None | |
| 56 | |
| 57 # .build points to a SlaveBuild object, a new one for each build | |
| 58 build = None | |
| 59 | |
| 60 # .command points to a SlaveCommand instance, and is set while the step | |
| 61 # is running. We use it to implement the stopBuild method. | |
| 62 command = None | |
| 63 | |
| 64 # .remoteStep is a ref to the master-side BuildStep object, and is set | |
| 65 # when the step is started | |
| 66 remoteStep = None | |
| 67 | |
| 68 def __init__(self, name, not_really): | |
| 69 #service.Service.__init__(self) # Service has no __init__ method | |
| 70 self.setName(name) | |
| 71 self.not_really = not_really | |
| 72 | |
| 73 def __repr__(self): | |
| 74 return "<SlaveBuilder '%s' at %d>" % (self.name, id(self)) | |
| 75 | |
| 76 def setServiceParent(self, parent): | |
| 77 service.Service.setServiceParent(self, parent) | |
| 78 self.bot = self.parent | |
| 79 # note that self.parent will go away when the buildmaster's config | |
| 80 # file changes and this Builder is removed (possibly because it has | |
| 81 # been changed, so the Builder will be re-added again in a moment). | |
| 82 # This may occur during a build, while a step is running. | |
| 83 | |
| 84 def setBuilddir(self, builddir): | |
| 85 assert self.parent | |
| 86 self.builddir = builddir | |
| 87 self.basedir = os.path.join(self.bot.basedir, self.builddir) | |
| 88 if not os.path.isdir(self.basedir): | |
| 89 os.makedirs(self.basedir) | |
| 90 | |
| 91 def stopService(self): | |
| 92 service.Service.stopService(self) | |
| 93 if self.stopCommandOnShutdown: | |
| 94 self.stopCommand() | |
| 95 | |
| 96 def activity(self): | |
| 97 bot = self.parent | |
| 98 if bot: | |
| 99 buildslave = bot.parent | |
| 100 if buildslave: | |
| 101 bf = buildslave.bf | |
| 102 bf.activity() | |
| 103 | |
| 104 def remote_setMaster(self, remote): | |
| 105 self.remote = remote | |
| 106 self.remote.notifyOnDisconnect(self.lostRemote) | |
| 107 def remote_print(self, message): | |
| 108 log.msg("SlaveBuilder.remote_print(%s): message from master: %s" % | |
| 109 (self.name, message)) | |
| 110 if message == "ping": | |
| 111 return self.remote_ping() | |
| 112 | |
| 113 def remote_ping(self): | |
| 114 log.msg("SlaveBuilder.remote_ping(%s)" % self) | |
| 115 if self.bot and self.bot.parent: | |
| 116 debugOpts = self.bot.parent.debugOpts | |
| 117 if debugOpts.get("stallPings"): | |
| 118 log.msg(" debug_stallPings") | |
| 119 timeout, timers = debugOpts["stallPings"] | |
| 120 d = defer.Deferred() | |
| 121 t = reactor.callLater(timeout, d.callback, None) | |
| 122 timers.append(t) | |
| 123 return d | |
| 124 if debugOpts.get("failPingOnce"): | |
| 125 log.msg(" debug_failPingOnce") | |
| 126 class FailPingError(pb.Error): pass | |
| 127 del debugOpts['failPingOnce'] | |
| 128 raise FailPingError("debug_failPingOnce means we should fail") | |
| 129 | |
| 130 def lostRemote(self, remote): | |
| 131 log.msg("lost remote") | |
| 132 self.remote = None | |
| 133 | |
| 134 def lostRemoteStep(self, remotestep): | |
| 135 log.msg("lost remote step") | |
| 136 self.remoteStep = None | |
| 137 if self.stopCommandOnShutdown: | |
| 138 self.stopCommand() | |
| 139 | |
| 140 # the following are Commands that can be invoked by the master-side | |
| 141 # Builder | |
| 142 def remote_startBuild(self): | |
| 143 """This is invoked before the first step of any new build is run. It | |
| 144 creates a new SlaveBuild object, which holds slave-side state from | |
| 145 one step to the next.""" | |
| 146 self.build = SlaveBuild(self) | |
| 147 log.msg("%s.startBuild" % self) | |
| 148 | |
| 149 def remote_startCommand(self, stepref, stepId, command, args): | |
| 150 """ | |
| 151 This gets invoked by L{buildbot.process.step.RemoteCommand.start}, as | |
| 152 part of various master-side BuildSteps, to start various commands | |
| 153 that actually do the build. I return nothing. Eventually I will call | |
| 154 .commandComplete() to notify the master-side RemoteCommand that I'm | |
| 155 done. | |
| 156 """ | |
| 157 | |
| 158 self.activity() | |
| 159 | |
| 160 if self.command: | |
| 161 log.msg("leftover command, dropping it") | |
| 162 self.stopCommand() | |
| 163 | |
| 164 try: | |
| 165 factory, version = registry.commandRegistry[command] | |
| 166 except KeyError: | |
| 167 raise UnknownCommand, "unrecognized SlaveCommand '%s'" % command | |
| 168 self.command = factory(self, stepId, args) | |
| 169 | |
| 170 log.msg(" startCommand:%s [id %s]" % (command,stepId)) | |
| 171 self.remoteStep = stepref | |
| 172 self.remoteStep.notifyOnDisconnect(self.lostRemoteStep) | |
| 173 d = self.command.doStart() | |
| 174 d.addCallback(lambda res: None) | |
| 175 d.addBoth(self.commandComplete) | |
| 176 return None | |
| 177 | |
| 178 def remote_interruptCommand(self, stepId, why): | |
| 179 """Halt the current step.""" | |
| 180 log.msg("asked to interrupt current command: %s" % why) | |
| 181 self.activity() | |
| 182 if not self.command: | |
| 183 # TODO: just log it, a race could result in their interrupting a | |
| 184 # command that wasn't actually running | |
| 185 log.msg(" .. but none was running") | |
| 186 return | |
| 187 self.command.doInterrupt() | |
| 188 | |
| 189 | |
| 190 def stopCommand(self): | |
| 191 """Make any currently-running command die, with no further status | |
| 192 output. This is used when the buildslave is shutting down or the | |
| 193 connection to the master has been lost. Interrupt the command, | |
| 194 silence it, and then forget about it.""" | |
| 195 if not self.command: | |
| 196 return | |
| 197 log.msg("stopCommand: halting current command %s" % self.command) | |
| 198 self.command.doInterrupt() # shut up! and die! | |
| 199 self.command = None # forget you! | |
| 200 | |
| 201 # sendUpdate is invoked by the Commands we spawn | |
| 202 def sendUpdate(self, data): | |
| 203 """This sends the status update to the master-side | |
| 204 L{buildbot.process.step.RemoteCommand} object, giving it a sequence | |
| 205 number in the process. It adds the update to a queue, and asks the | |
| 206 master to acknowledge the update so it can be removed from that | |
| 207 queue.""" | |
| 208 | |
| 209 if not self.running: | |
| 210 # .running comes from service.Service, and says whether the | |
| 211 # service is running or not. If we aren't running, don't send any | |
| 212 # status messages. | |
| 213 return | |
| 214 # the update[1]=0 comes from the leftover 'updateNum', which the | |
| 215 # master still expects to receive. Provide it to avoid significant | |
| 216 # interoperability issues between new slaves and old masters. | |
| 217 if self.remoteStep: | |
| 218 update = [data, 0] | |
| 219 updates = [update] | |
| 220 d = self.remoteStep.callRemote("update", updates) | |
| 221 d.addCallback(self.ackUpdate) | |
| 222 d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate") | |
| 223 | |
| 224 def ackUpdate(self, acknum): | |
| 225 self.activity() # update the "last activity" timer | |
| 226 | |
| 227 def ackComplete(self, dummy): | |
| 228 self.activity() # update the "last activity" timer | |
| 229 | |
| 230 def _ackFailed(self, why, where): | |
| 231 log.msg("SlaveBuilder._ackFailed:", where) | |
| 232 #log.err(why) # we don't really care | |
| 233 | |
| 234 | |
| 235 # this is fired by the Deferred attached to each Command | |
| 236 def commandComplete(self, failure): | |
| 237 if failure: | |
| 238 log.msg("SlaveBuilder.commandFailed", self.command) | |
| 239 log.err(failure) | |
| 240 # failure, if present, is a failure.Failure. To send it across | |
| 241 # the wire, we must turn it into a pb.CopyableFailure. | |
| 242 failure = pb.CopyableFailure(failure) | |
| 243 failure.unsafeTracebacks = True | |
| 244 else: | |
| 245 # failure is None | |
| 246 log.msg("SlaveBuilder.commandComplete", self.command) | |
| 247 self.command = None | |
| 248 if not self.running: | |
| 249 log.msg(" but we weren't running, quitting silently") | |
| 250 return | |
| 251 if self.remoteStep: | |
| 252 self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep) | |
| 253 d = self.remoteStep.callRemote("complete", failure) | |
| 254 d.addCallback(self.ackComplete) | |
| 255 d.addErrback(self._ackFailed, "sendComplete") | |
| 256 self.remoteStep = None | |
| 257 | |
| 258 | |
| 259 def remote_shutdown(self): | |
| 260 print "slave shutting down on command from master" | |
| 261 reactor.stop() | |
| 262 | |
| 263 | |
| 264 class Bot(pb.Referenceable, service.MultiService): | |
| 265 """I represent the slave-side bot.""" | |
| 266 usePTY = None | |
| 267 name = "bot" | |
| 268 | |
| 269 def __init__(self, basedir, usePTY, not_really=0): | |
| 270 service.MultiService.__init__(self) | |
| 271 self.basedir = basedir | |
| 272 self.usePTY = usePTY | |
| 273 self.not_really = not_really | |
| 274 self.builders = {} | |
| 275 | |
| 276 def startService(self): | |
| 277 assert os.path.isdir(self.basedir) | |
| 278 service.MultiService.startService(self) | |
| 279 | |
| 280 def remote_getDirs(self): | |
| 281 return filter(lambda d: os.path.isdir(d), os.listdir(self.basedir)) | |
| 282 | |
| 283 def remote_getCommands(self): | |
| 284 commands = {} | |
| 285 for name, (factory, version) in registry.commandRegistry.items(): | |
| 286 commands[name] = version | |
| 287 return commands | |
| 288 | |
| 289 def remote_setBuilderList(self, wanted): | |
| 290 retval = {} | |
| 291 wanted_dirs = ["info"] | |
| 292 for (name, builddir) in wanted: | |
| 293 wanted_dirs.append(builddir) | |
| 294 b = self.builders.get(name, None) | |
| 295 if b: | |
| 296 if b.builddir != builddir: | |
| 297 log.msg("changing builddir for builder %s from %s to %s" \ | |
| 298 % (name, b.builddir, builddir)) | |
| 299 b.setBuilddir(builddir) | |
| 300 else: | |
| 301 b = SlaveBuilder(name, self.not_really) | |
| 302 b.usePTY = self.usePTY | |
| 303 b.setServiceParent(self) | |
| 304 b.setBuilddir(builddir) | |
| 305 self.builders[name] = b | |
| 306 retval[name] = b | |
| 307 for name in self.builders.keys(): | |
| 308 if not name in map(lambda a: a[0], wanted): | |
| 309 log.msg("removing old builder %s" % name) | |
| 310 self.builders[name].disownServiceParent() | |
| 311 del(self.builders[name]) | |
| 312 | |
| 313 for d in os.listdir(self.basedir): | |
| 314 if os.path.isdir(d): | |
| 315 if d not in wanted_dirs: | |
| 316 log.msg("I have a leftover directory '%s' that is not " | |
| 317 "being used by the buildmaster: you can delete " | |
| 318 "it now" % d) | |
| 319 return retval | |
| 320 | |
| 321 def remote_print(self, message): | |
| 322 log.msg("message from master:", message) | |
| 323 | |
| 324 def remote_getSlaveInfo(self): | |
| 325 """This command retrieves data from the files in SLAVEDIR/info/* and | |
| 326 sends the contents to the buildmaster. These are used to describe | |
| 327 the slave and its configuration, and should be created and | |
| 328 maintained by the slave administrator. They will be retrieved each | |
| 329 time the master-slave connection is established. | |
| 330 """ | |
| 331 | |
| 332 files = {} | |
| 333 basedir = os.path.join(self.basedir, "info") | |
| 334 if not os.path.isdir(basedir): | |
| 335 return files | |
| 336 for f in os.listdir(basedir): | |
| 337 filename = os.path.join(basedir, f) | |
| 338 if os.path.isfile(filename): | |
| 339 files[f] = open(filename, "r").read() | |
| 340 return files | |
| 341 | |
| 342 def remote_getVersion(self): | |
| 343 """Send our version back to the Master""" | |
| 344 return buildbot.version | |
| 345 | |
| 346 | |
| 347 | |
| 348 class BotFactory(ReconnectingPBClientFactory): | |
| 349 # 'keepaliveInterval' serves two purposes. The first is to keep the | |
| 350 # connection alive: it guarantees that there will be at least some | |
| 351 # traffic once every 'keepaliveInterval' seconds, which may help keep an | |
| 352 # interposed NAT gateway from dropping the address mapping because it | |
| 353 # thinks the connection has been abandoned. The second is to put an upper | |
| 354 # limit on how long the buildmaster might have gone away before we notice | |
| 355 # it. For this second purpose, we insist upon seeing *some* evidence of | |
| 356 # the buildmaster at least once every 'keepaliveInterval' seconds. | |
| 357 keepaliveInterval = None # None = do not use keepalives | |
| 358 | |
| 359 # 'keepaliveTimeout' seconds before the interval expires, we will send a | |
| 360 # keepalive request, both to add some traffic to the connection, and to | |
| 361 # prompt a response from the master in case all our builders are idle. We | |
| 362 # don't insist upon receiving a timely response from this message: a slow | |
| 363 # link might put the request at the wrong end of a large build message. | |
| 364 keepaliveTimeout = 30 # how long we will go without a response | |
| 365 | |
| 366 # 'maxDelay' determines the maximum amount of time the slave will wait | |
| 367 # between connection retries | |
| 368 maxDelay = 300 | |
| 369 | |
| 370 keepaliveTimer = None | |
| 371 activityTimer = None | |
| 372 lastActivity = 0 | |
| 373 unsafeTracebacks = 1 | |
| 374 perspective = None | |
| 375 | |
| 376 def __init__(self, keepaliveInterval, keepaliveTimeout, maxDelay): | |
| 377 ReconnectingPBClientFactory.__init__(self) | |
| 378 self.maxDelay = maxDelay | |
| 379 self.keepaliveInterval = keepaliveInterval | |
| 380 self.keepaliveTimeout = keepaliveTimeout | |
| 381 | |
| 382 def startedConnecting(self, connector): | |
| 383 ReconnectingPBClientFactory.startedConnecting(self, connector) | |
| 384 self.connector = connector | |
| 385 | |
| 386 def gotPerspective(self, perspective): | |
| 387 ReconnectingPBClientFactory.gotPerspective(self, perspective) | |
| 388 self.perspective = perspective | |
| 389 try: | |
| 390 perspective.broker.transport.setTcpKeepAlive(1) | |
| 391 except: | |
| 392 log.msg("unable to set SO_KEEPALIVE") | |
| 393 if not self.keepaliveInterval: | |
| 394 self.keepaliveInterval = 10*60 | |
| 395 self.activity() | |
| 396 if self.keepaliveInterval: | |
| 397 log.msg("sending application-level keepalives every %d seconds" \ | |
| 398 % self.keepaliveInterval) | |
| 399 self.startTimers() | |
| 400 | |
| 401 def clientConnectionFailed(self, connector, reason): | |
| 402 self.connector = None | |
| 403 ReconnectingPBClientFactory.clientConnectionFailed(self, | |
| 404 connector, reason) | |
| 405 | |
| 406 def clientConnectionLost(self, connector, reason): | |
| 407 self.connector = None | |
| 408 self.stopTimers() | |
| 409 self.perspective = None | |
| 410 ReconnectingPBClientFactory.clientConnectionLost(self, | |
| 411 connector, reason) | |
| 412 | |
| 413 def startTimers(self): | |
| 414 assert self.keepaliveInterval | |
| 415 assert not self.keepaliveTimer | |
| 416 assert not self.activityTimer | |
| 417 # Insist that doKeepalive fires before checkActivity. Really, it | |
| 418 # needs to happen at least one RTT beforehand. | |
| 419 assert self.keepaliveInterval > self.keepaliveTimeout | |
| 420 | |
| 421 # arrange to send a keepalive a little while before our deadline | |
| 422 when = self.keepaliveInterval - self.keepaliveTimeout | |
| 423 self.keepaliveTimer = reactor.callLater(when, self.doKeepalive) | |
| 424 # and check for activity too | |
| 425 self.activityTimer = reactor.callLater(self.keepaliveInterval, | |
| 426 self.checkActivity) | |
| 427 | |
| 428 def stopTimers(self): | |
| 429 if self.keepaliveTimer: | |
| 430 self.keepaliveTimer.cancel() | |
| 431 self.keepaliveTimer = None | |
| 432 if self.activityTimer: | |
| 433 self.activityTimer.cancel() | |
| 434 self.activityTimer = None | |
| 435 | |
| 436 def activity(self, res=None): | |
| 437 self.lastActivity = now() | |
| 438 | |
| 439 def doKeepalive(self): | |
| 440 # send the keepalive request. If it fails outright, the connection | |
| 441 # was already dropped, so just log and ignore. | |
| 442 self.keepaliveTimer = None | |
| 443 log.msg("sending app-level keepalive") | |
| 444 d = self.perspective.callRemote("keepalive") | |
| 445 d.addCallback(self.activity) | |
| 446 d.addErrback(self.keepaliveLost) | |
| 447 | |
| 448 def keepaliveLost(self, f): | |
| 449 log.msg("BotFactory.keepaliveLost") | |
| 450 | |
| 451 def checkActivity(self): | |
| 452 self.activityTimer = None | |
| 453 if self.lastActivity + self.keepaliveInterval < now(): | |
| 454 log.msg("BotFactory.checkActivity: nothing from master for " | |
| 455 "%d secs" % (now() - self.lastActivity)) | |
| 456 self.perspective.broker.transport.loseConnection() | |
| 457 return | |
| 458 self.startTimers() | |
| 459 | |
| 460 def stopFactory(self): | |
| 461 ReconnectingPBClientFactory.stopFactory(self) | |
| 462 self.stopTimers() | |
| 463 | |
| 464 | |
| 465 class BuildSlave(service.MultiService): | |
| 466 botClass = Bot | |
| 467 | |
| 468 # debugOpts is a dictionary used during unit tests. | |
| 469 | |
| 470 # debugOpts['stallPings'] can be set to a tuple of (timeout, []). Any | |
| 471 # calls to remote_print will stall for 'timeout' seconds before | |
| 472 # returning. The DelayedCalls used to implement this are stashed in the | |
| 473 # list so they can be cancelled later. | |
| 474 | |
| 475 # debugOpts['failPingOnce'] can be set to True to make the slaveping fail | |
| 476 # exactly once. | |
| 477 | |
| 478 def __init__(self, buildmaster_host, port, name, passwd, basedir, | |
| 479 keepalive, usePTY, keepaliveTimeout=30, umask=None, | |
| 480 maxdelay=300, debugOpts={}): | |
| 481 log.msg("Creating BuildSlave -- buildbot.version: %s" % buildbot.version
) | |
| 482 service.MultiService.__init__(self) | |
| 483 self.debugOpts = debugOpts.copy() | |
| 484 bot = self.botClass(basedir, usePTY) | |
| 485 bot.setServiceParent(self) | |
| 486 self.bot = bot | |
| 487 if keepalive == 0: | |
| 488 keepalive = None | |
| 489 self.umask = umask | |
| 490 bf = self.bf = BotFactory(keepalive, keepaliveTimeout, maxdelay) | |
| 491 bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot) | |
| 492 self.connection = c = internet.TCPClient(buildmaster_host, port, bf) | |
| 493 c.setServiceParent(self) | |
| 494 | |
| 495 def waitUntilDisconnected(self): | |
| 496 # utility method for testing. Returns a Deferred that will fire when | |
| 497 # we lose the connection to the master. | |
| 498 if not self.bf.perspective: | |
| 499 return defer.succeed(None) | |
| 500 d = defer.Deferred() | |
| 501 self.bf.perspective.notifyOnDisconnect(lambda res: d.callback(None)) | |
| 502 return d | |
| 503 | |
| 504 def startService(self): | |
| 505 if self.umask is not None: | |
| 506 os.umask(self.umask) | |
| 507 service.MultiService.startService(self) | |
| 508 | |
| 509 def stopService(self): | |
| 510 self.bf.continueTrying = 0 | |
| 511 self.bf.stopTrying() | |
| 512 service.MultiService.stopService(self) | |
| 513 # now kill the TCP connection | |
| 514 # twisted >2.0.1 does this for us, and leaves _connection=None | |
| 515 if self.connection._connection: | |
| 516 self.connection._connection.disconnect() | |
| OLD | NEW |