| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import collections |
| 6 import time |
| 7 |
| 5 from buildbot.status.base import StatusReceiverMultiService | 8 from buildbot.status.base import StatusReceiverMultiService |
| 6 from twisted.internet import task | 9 from twisted.internet import defer, reactor, task |
| 7 from twisted.python import log, threadpool | 10 from twisted.python import log, threadpool |
| 8 | 11 |
| 9 from infra_libs import ts_mon | 12 from infra_libs import ts_mon |
| 10 | 13 |
| 14 uptime = ts_mon.FloatMetric('buildbot/master/uptime', |
| 15 description='Time (in seconds) since the master was started') |
| 16 accepting_builds = ts_mon.BooleanMetric('buildbot/master/accepting_builds', |
| 17 description='Whether the master\'s BuildRequestDistributor is running') |
| 18 |
| 19 connected = ts_mon.GaugeMetric('buildbot/master/builders/connected_slaves', |
| 20 description='Number of slaves currently connected, per builder') |
| 21 current_builds = ts_mon.GaugeMetric('buildbot/master/builders/current_builds', |
| 22 description='Number of builds currently running, per builder') |
| 23 pending_builds = ts_mon.GaugeMetric('buildbot/master/builders/pending_builds', |
| 24 description='Number of builds pending, per builder') |
| 25 state = ts_mon.StringMetric('buildbot/master/builders/state', |
| 26 description='State of this builder - building, idle, or offline') |
| 27 total = ts_mon.GaugeMetric('buildbot/master/builders/total_slaves', |
| 28 description='Number of slaves configured on this builder - connected or ' |
| 29 'not') |
| 30 |
| 31 pool_queue = ts_mon.GaugeMetric('buildbot/master/thread_pool/queue', |
| 32 description='Number of runnables queued in the database thread pool') |
| 33 pool_waiting = ts_mon.GaugeMetric('buildbot/master/thread_pool/waiting', |
| 34 description='Number of idle workers for the database thread pool') |
| 35 pool_working = ts_mon.GaugeMetric('buildbot/master/thread_pool/working', |
| 36 description='Number of running workers for the database thread pool') |
| 37 |
| 38 SERVER_STARTED = time.time() |
| 39 |
| 11 | 40 |
| 12 class MonitoringStatusReceiver(StatusReceiverMultiService): | 41 class MonitoringStatusReceiver(StatusReceiverMultiService): |
| 13 """Flushes ts_mon metrics once per minute.""" | 42 """Flushes ts_mon metrics once per minute.""" |
| 14 | 43 |
| 15 def __init__(self): | 44 def __init__(self): |
| 16 StatusReceiverMultiService.__init__(self) | 45 StatusReceiverMultiService.__init__(self) |
| 17 self.status = None | 46 self.status = None |
| 18 self.thread_pool = threadpool.ThreadPool(1, 1) | 47 self.thread_pool = threadpool.ThreadPool(1, 1) |
| 19 self.loop = task.LoopingCall(self._flush) | 48 self.loop = task.LoopingCall(self.updateMetricsAndFlush) |
| 20 | 49 |
| 21 def startService(self): | 50 def startService(self): |
| 22 StatusReceiverMultiService.startService(self) | 51 StatusReceiverMultiService.startService(self) |
| 23 self.status = self.parent.getStatus() | 52 self.status = self.parent.getStatus() |
| 24 self.status.subscribe(self) | 53 self.status.subscribe(self) |
| 25 | 54 |
| 26 self.thread_pool.start() | 55 self.thread_pool.start() |
| 27 self.loop.start(60, now=False) | 56 self.loop.start(60, now=False) |
| 28 | 57 |
| 29 def stopService(self): | 58 def stopService(self): |
| 30 self.loop.stop() | 59 self.loop.stop() |
| 31 self.thread_pool.stop() | 60 self.thread_pool.stop() |
| 32 return StatusReceiverMultiService.stopService(self) | 61 return StatusReceiverMultiService.stopService(self) |
| 33 | 62 |
| 34 def _flush(self): | 63 @defer.inlineCallbacks |
| 35 self.thread_pool.callInThread(self._flush_and_log_exceptions) | 64 def updateMetricsAndFlush(self): |
| 65 try: |
| 66 yield self.updateMetrics() |
| 67 finally: |
| 68 self.thread_pool.callInThread(self._flush_and_log_exceptions) |
| 69 |
| 70 @defer.inlineCallbacks |
| 71 def updateMetrics(self): |
| 72 uptime.set(time.time() - SERVER_STARTED) |
| 73 accepting_builds.set(bool(self.status.master.botmaster.brd.running)) |
| 74 pool = self.status.master.db.pool |
| 75 pool_queue.set(pool.q.qsize()) |
| 76 pool_waiting.set(len(pool.waiters)) |
| 77 pool_working.set(len(pool.working)) |
| 78 |
| 79 for builder_name in self.status.getBuilderNames(): |
| 80 fields = {'builder': builder_name} |
| 81 builder = self.status.getBuilder(builder_name) |
| 82 slaves = builder.getSlaves() |
| 83 |
| 84 connected.set(sum(1 for x in slaves if x.connected), fields=fields) |
| 85 current_builds.set(len(builder.getCurrentBuilds()), fields=fields) |
| 86 state.set(builder.currentBigState, fields=fields) |
| 87 total.set(len(slaves), fields=fields) |
| 88 |
| 89 # Get pending build requests directly from the db for all builders at |
| 90 # once. |
| 91 d = self.status.master.db.buildrequests.getBuildRequests(claimed=False) |
| 92 |
| 93 # Timeout the database request after 5 seconds. |
| 94 def timeout(): |
| 95 if not d.called: |
| 96 d.cancel() |
| 97 reactor.callLater(5, timeout) |
| 98 |
| 99 try: |
| 100 brdicts = yield d |
| 101 except Exception as ex: |
| 102 log.err(ex, 'getBuildRequests failed while failed populating metrics') |
| 103 else: |
| 104 pending_per_builder = collections.defaultdict(int) |
| 105 for brdict in brdicts: |
| 106 pending_per_builder[brdict['buildername']] += 1 |
| 107 |
| 108 for builder_name, count in pending_per_builder.iteritems(): |
| 109 pending_builds.set(count, fields={'builder': builder_name}) |
| 36 | 110 |
| 37 def _flush_and_log_exceptions(self): | 111 def _flush_and_log_exceptions(self): |
| 38 try: | 112 try: |
| 39 ts_mon.flush() | 113 ts_mon.flush() |
| 40 except Exception: | 114 except Exception: |
| 41 log.err(None, 'Automatic monitoring flush failed.') | 115 log.err(None, 'Automatic monitoring flush failed.') |
| OLD | NEW |