Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(765)

Side by Side Diff: scripts/master/monitoring_status_receiver.py

Issue 2102383003: Report the /json/varz metrics directly from buildbot. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@buildbot-tsmon-2
Patch Set: Rebase Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import collections
6 import time
7
5 from buildbot.status.base import StatusReceiverMultiService 8 from buildbot.status.base import StatusReceiverMultiService
6 from twisted.internet import task 9 from twisted.internet import defer, reactor, task
7 from twisted.python import log, threadpool 10 from twisted.python import log, threadpool
8 11
9 from infra_libs import ts_mon 12 from infra_libs import ts_mon
10 13
14 uptime = ts_mon.FloatMetric('buildbot/master/uptime',
15 description='Time (in seconds) since the master was started')
16 accepting_builds = ts_mon.BooleanMetric('buildbot/master/accepting_builds',
17 description='Whether the master\'s BuildRequestDistributor is running')
18
19 connected = ts_mon.GaugeMetric('buildbot/master/builders/connected_slaves',
20 description='Number of slaves currently connected, per builder')
21 current_builds = ts_mon.GaugeMetric('buildbot/master/builders/current_builds',
22 description='Number of builds currently running, per builder')
23 pending_builds = ts_mon.GaugeMetric('buildbot/master/builders/pending_builds',
24 description='Number of builds pending, per builder')
25 state = ts_mon.StringMetric('buildbot/master/builders/state',
26 description='State of this builder - building, idle, or offline')
27 total = ts_mon.GaugeMetric('buildbot/master/builders/total_slaves',
28 description='Number of slaves configured on this builder - connected or '
29 'not')
30
31 pool_queue = ts_mon.GaugeMetric('buildbot/master/thread_pool/queue',
32 description='Number of runnables queued in the database thread pool')
33 pool_waiting = ts_mon.GaugeMetric('buildbot/master/thread_pool/waiting',
34 description='Number of idle workers for the database thread pool')
35 pool_working = ts_mon.GaugeMetric('buildbot/master/thread_pool/working',
36 description='Number of running workers for the database thread pool')
37
38 SERVER_STARTED = time.time()
39
11 40
12 class MonitoringStatusReceiver(StatusReceiverMultiService): 41 class MonitoringStatusReceiver(StatusReceiverMultiService):
13 """Flushes ts_mon metrics once per minute.""" 42 """Flushes ts_mon metrics once per minute."""
14 43
15 def __init__(self): 44 def __init__(self):
16 StatusReceiverMultiService.__init__(self) 45 StatusReceiverMultiService.__init__(self)
17 self.status = None 46 self.status = None
18 self.thread_pool = threadpool.ThreadPool(1, 1) 47 self.thread_pool = threadpool.ThreadPool(1, 1)
19 self.loop = task.LoopingCall(self._flush) 48 self.loop = task.LoopingCall(self.updateMetricsAndFlush)
20 49
21 def startService(self): 50 def startService(self):
22 StatusReceiverMultiService.startService(self) 51 StatusReceiverMultiService.startService(self)
23 self.status = self.parent.getStatus() 52 self.status = self.parent.getStatus()
24 self.status.subscribe(self) 53 self.status.subscribe(self)
25 54
26 self.thread_pool.start() 55 self.thread_pool.start()
27 self.loop.start(60, now=False) 56 self.loop.start(60, now=False)
28 57
29 def stopService(self): 58 def stopService(self):
30 self.loop.stop() 59 self.loop.stop()
31 self.thread_pool.stop() 60 self.thread_pool.stop()
32 return StatusReceiverMultiService.stopService(self) 61 return StatusReceiverMultiService.stopService(self)
33 62
34 def _flush(self): 63 @defer.inlineCallbacks
35 self.thread_pool.callInThread(self._flush_and_log_exceptions) 64 def updateMetricsAndFlush(self):
65 try:
66 yield self.updateMetrics()
67 finally:
68 self.thread_pool.callInThread(self._flush_and_log_exceptions)
69
70 @defer.inlineCallbacks
71 def updateMetrics(self):
72 uptime.set(time.time() - SERVER_STARTED)
73 accepting_builds.set(bool(self.status.master.botmaster.brd.running))
74 pool = self.status.master.db.pool
75 pool_queue.set(pool.q.qsize())
76 pool_waiting.set(len(pool.waiters))
77 pool_working.set(len(pool.working))
78
79 for builder_name in self.status.getBuilderNames():
80 fields = {'builder': builder_name}
81 builder = self.status.getBuilder(builder_name)
82 slaves = builder.getSlaves()
83
84 connected.set(sum(1 for x in slaves if x.connected), fields=fields)
85 current_builds.set(len(builder.getCurrentBuilds()), fields=fields)
86 state.set(builder.currentBigState, fields=fields)
87 total.set(len(slaves), fields=fields)
88
89 # Get pending build requests directly from the db for all builders at
90 # once.
91 d = self.status.master.db.buildrequests.getBuildRequests(claimed=False)
92
93 # Timeout the database request after 5 seconds.
94 def timeout():
95 if not d.called:
96 d.cancel()
97 reactor.callLater(5, timeout)
98
99 try:
100 brdicts = yield d
101 except Exception as ex:
102 log.err(ex, 'getBuildRequests failed while failed populating metrics')
103 else:
104 pending_per_builder = collections.defaultdict(int)
105 for brdict in brdicts:
106 pending_per_builder[brdict['buildername']] += 1
107
108 for builder_name, count in pending_per_builder.iteritems():
109 pending_builds.set(count, fields={'builder': builder_name})
36 110
37 def _flush_and_log_exceptions(self): 111 def _flush_and_log_exceptions(self):
38 try: 112 try:
39 ts_mon.flush() 113 ts_mon.flush()
40 except Exception: 114 except Exception:
41 log.err(None, 'Automatic monitoring flush failed.') 115 log.err(None, 'Automatic monitoring flush failed.')
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698