Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(99)

Side by Side Diff: appengine/cr-buildbucket/metrics.py

Issue 1532713002: buildbucket: add monitoring using ts_mon (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: duration Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/cr-buildbucket/main.py ('k') | appengine/cr-buildbucket/module-backend.yaml » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6
7 from google.appengine.api import app_identity
6 from google.appengine.ext import ndb 8 from google.appengine.ext import ndb
7 9
8 from components import metrics 10 from components import metrics
9 from components import utils 11 from components import utils
12 import gae_ts_mon
10 13
11 import config 14 import config
12 import model 15 import model
13 16
17 # TODO(nodir): remove Cloud Monitoring and refactor
18 # when gae_ts_mon is stabilized
19
20 # TODO(nodir): move this list to luci-config
21 TAG_FIELDS = [
22 'builder',
23 'user_agent',
24 ]
25
14 LABEL_BUCKET = 'buildbucket/bucket' 26 LABEL_BUCKET = 'buildbucket/bucket'
15 COMMON_LABELS = { 27 COMMON_LABELS = {
16 LABEL_BUCKET: 'Bucket' 28 LABEL_BUCKET: 'Bucket'
17 } 29 }
18 METRIC_PENDING_BUILDS = metrics.Descriptor( 30 METRIC_PENDING_BUILDS = metrics.Descriptor(
19 name='buildbucket/builds/pending', 31 name='buildbucket/builds/pending',
20 description='Number of pending builds', 32 description='Number of pending builds',
21 labels=COMMON_LABELS, 33 labels=COMMON_LABELS,
22 ) 34 )
23 METRIC_RUNNING_BUILDS = metrics.Descriptor( 35 METRIC_RUNNING_BUILDS = metrics.Descriptor(
(...skipping 12 matching lines...) Expand all
36 METRIC_SCHEDULING_LATENCY = metrics.Descriptor( 48 METRIC_SCHEDULING_LATENCY = metrics.Descriptor(
37 name='buildbucket/builds/scheduling_latency', 49 name='buildbucket/builds/scheduling_latency',
38 description=( 50 description=(
39 'Average number of seconds for a scheduled build ' 51 'Average number of seconds for a scheduled build '
40 'to remain in SCHEDULED leased state' 52 'to remain in SCHEDULED leased state'
41 ), 53 ),
42 value_type='double', 54 value_type='double',
43 labels=COMMON_LABELS, 55 labels=COMMON_LABELS,
44 ) 56 )
45 57
58 # gae_ts_mon
59 FIELD_BUCKET = 'bucket'
60 COMMON_FIELDS = {
61 'buildbucket_hostname': app_identity.get_default_version_hostname(),
62 }
63
64
65 def _def_metric(metric_type, name, description):
66 return metric_type(
67 'buildbucket/%s' % name,
68 fields=COMMON_FIELDS,
69 description=description)
70
71
72 CREATE_COUNT = _def_metric(
73 gae_ts_mon.CounterMetric,
74 'builds/created',
75 'Build creation',
76 )
77 START_COUNT = _def_metric(
78 gae_ts_mon.CounterMetric,
79 'builds/started',
80 'Build start',
81 )
82 COMPLETE_COUNT = _def_metric(
83 gae_ts_mon.CounterMetric,
84 'builds/completed',
85 'Build completion, including success, failure and cancellation'
86 )
87 HEARTBEAT_COUNT = _def_metric(
88 gae_ts_mon.CounterMetric,
89 'builds/heartbeats',
90 'Failures to extend a build lease'
91 )
92 LEASE_COUNT = _def_metric(
93 gae_ts_mon.CounterMetric,
94 'builds/leases',
95 'Successful build lease extension',
96 )
97 LEASE_EXPIRATION_COUNT = _def_metric(
98 gae_ts_mon.CounterMetric,
99 'builds/lease_expired',
100 'Build lease expirations'
101 )
102 CURRENTLY_PENDING = _def_metric(
103 gae_ts_mon.GaugeMetric,
104 'builds/pending',
105 'Number of pending builds',
106 )
107 CURRENTLY_RUNNING = _def_metric(
108 gae_ts_mon.GaugeMetric,
109 'builds/running',
110 'Number of running builds'
111 )
112 LEASE_LATENCY = _def_metric(
113 gae_ts_mon.NonCumulativeDistributionMetric,
114 'builds/never_leased_duration',
115 'Duration between a build is created and it is leased for the first time',
116 )
117 SCHEDULING_LATENCY = _def_metric(
118 gae_ts_mon.NonCumulativeDistributionMetric,
119 'builds/scheduling_duration',
120 'Duration of a build remaining in SCHEDULED state',
121 )
122
123
124 GAUGE_OF_CLOUD_METRIC = {
125 METRIC_PENDING_BUILDS: CURRENTLY_PENDING,
126 METRIC_RUNNING_BUILDS: CURRENTLY_RUNNING,
127 }
128 DISTRIBUTION_OF_CLOUD_METRIC = {
129 METRIC_LEASE_BUILD_LATENCY: LEASE_LATENCY,
130 METRIC_SCHEDULING_LATENCY: SCHEDULING_LATENCY,
131 }
132
133 def fields_for(build, **extra):
134 fields = extra
135 fields.setdefault(FIELD_BUCKET, build.bucket if build else '<no bucket>')
136 if build: # pragma: no branch
137 tags = dict(t.split(':', 1) for t in build.tags)
138 for t in TAG_FIELDS:
139 fields.setdefault(t, tags.get(t))
140 return fields
141
142
143 def increment(metric, build, **fields): # pragma: no cover
144 """Increments a counter metric."""
145 metric.increment(fields_for(build, **fields))
146
46 147
47 def set_gauge(buf, bucket, metric, value): 148 def set_gauge(buf, bucket, metric, value):
48 logging.info('Bucket %s: %s = %d', bucket, metric.name, value) 149 logging.info('Bucket %s: %s = %d', bucket, metric.name, value)
49 buf.set_gauge(metric, value, {LABEL_BUCKET: bucket}) 150 buf.set_gauge(metric, value, {LABEL_BUCKET: bucket})
151 gae_ts_mon_metric = GAUGE_OF_CLOUD_METRIC.get(metric)
152 if gae_ts_mon_metric:
153 gae_ts_mon_metric.set(value, {FIELD_BUCKET: bucket})
50 154
51 155
52 @ndb.tasklet 156 @ndb.tasklet
53 def send_build_status_metric(buf, bucket, metric, status): 157 def send_build_status_metric(buf, bucket, metric, status):
54 q = model.Build.query( 158 q = model.Build.query(
55 model.Build.bucket == bucket, 159 model.Build.bucket == bucket,
56 model.Build.status == status) 160 model.Build.status == status)
57 value = yield q.count_async() 161 value = yield q.count_async()
58 set_gauge(buf, bucket, metric, value) 162 set_gauge(buf, bucket, metric, value)
59 163
60 164
61 @ndb.tasklet 165 @ndb.tasklet
62 def send_build_latency(buf, metric, bucket, must_be_never_leased): 166 def send_build_latency(buf, metric, bucket, must_be_never_leased):
63 q = model.Build.query( 167 q = model.Build.query(
64 model.Build.bucket == bucket, 168 model.Build.bucket == bucket,
65 model.Build.status == model.BuildStatus.SCHEDULED, 169 model.Build.status == model.BuildStatus.SCHEDULED,
66 ) 170 )
67 if must_be_never_leased: 171 if must_be_never_leased:
68 q = q.filter(model.Build.never_leased == True) 172 q = q.filter(model.Build.never_leased == True)
69 else: 173 else:
70 # Reuse the index that has never_leased 174 # Reuse the index that has never_leased
71 q = q.filter(model.Build.never_leased.IN((True, False))) 175 q = q.filter(model.Build.never_leased.IN((True, False)))
72 176
73 now = utils.utcnow() 177 now = utils.utcnow()
74 avg_latency = 0.0 178 avg_latency = 0.0
75 count = 0 179 count = 0
180 dist = gae_ts_mon.Distribution(gae_ts_mon.GeometricBucketer())
76 for e in q.iter(projection=[model.Build.create_time]): 181 for e in q.iter(projection=[model.Build.create_time]):
77 avg_latency += (now - e.create_time).total_seconds() 182 latency = (now - e.create_time).total_seconds()
183 dist.add(latency)
184 avg_latency += latency
78 count += 1 185 count += 1
79 if count > 0: 186 if count > 0:
80 avg_latency /= count 187 avg_latency /= count
81 set_gauge(buf, bucket, metric, avg_latency) 188 set_gauge(buf, bucket, metric, avg_latency)
189 DISTRIBUTION_OF_CLOUD_METRIC[metric].set(dist, {FIELD_BUCKET: bucket})
82 190
83 191
84 def send_all_metrics(): 192 def send_all_metrics():
85 buf = metrics.Buffer() 193 buf = metrics.Buffer()
86 futures = [] 194 futures = []
87 for b in config.get_buckets_async().get_result(): 195 for b in config.get_buckets_async().get_result():
88 futures.extend([ 196 futures.extend([
89 send_build_status_metric( 197 send_build_status_metric(
90 buf, b.name, METRIC_PENDING_BUILDS, model.BuildStatus.SCHEDULED), 198 buf, b.name, METRIC_PENDING_BUILDS, model.BuildStatus.SCHEDULED),
91 send_build_status_metric( 199 send_build_status_metric(
92 buf, b.name, METRIC_RUNNING_BUILDS, model.BuildStatus.STARTED), 200 buf, b.name, METRIC_RUNNING_BUILDS, model.BuildStatus.STARTED),
93 send_build_latency(buf, METRIC_LEASE_BUILD_LATENCY, b.name, True), 201 send_build_latency(buf, METRIC_LEASE_BUILD_LATENCY, b.name, True),
94 send_build_latency(buf, METRIC_SCHEDULING_LATENCY, b.name, False), 202 send_build_latency(buf, METRIC_SCHEDULING_LATENCY, b.name, False),
95 ]) 203 ])
96 ndb.Future.wait_all(futures) 204 ndb.Future.wait_all(futures)
97 buf.flush() 205 buf.flush()
98 for f in futures: 206 for f in futures:
99 f.check_success() 207 f.check_success()
OLDNEW
« no previous file with comments | « appengine/cr-buildbucket/main.py ('k') | appengine/cr-buildbucket/module-backend.yaml » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698