Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 | |
| 7 from google.appengine.api import app_identity | |
| 6 from google.appengine.ext import ndb | 8 from google.appengine.ext import ndb |
| 7 | 9 |
| 8 from components import metrics | 10 from components import metrics |
| 9 from components import utils | 11 from components import utils |
| 12 import gae_ts_mon | |
| 10 | 13 |
| 11 import config | 14 import config |
| 12 import model | 15 import model |
| 13 | 16 |
| 17 # TODO(nodir): remove Cloud Monitoring and refactor | |
| 18 # when gae_ts_mon is stabilized | |
| 19 | |
| 14 LABEL_BUCKET = 'buildbucket/bucket' | 20 LABEL_BUCKET = 'buildbucket/bucket' |
| 15 COMMON_LABELS = { | 21 COMMON_LABELS = { |
| 16 LABEL_BUCKET: 'Bucket' | 22 LABEL_BUCKET: 'Bucket' |
| 17 } | 23 } |
| 18 METRIC_PENDING_BUILDS = metrics.Descriptor( | 24 METRIC_PENDING_BUILDS = metrics.Descriptor( |
| 19 name='buildbucket/builds/pending', | 25 name='buildbucket/builds/pending', |
| 20 description='Number of pending builds', | 26 description='Number of pending builds', |
| 21 labels=COMMON_LABELS, | 27 labels=COMMON_LABELS, |
| 22 ) | 28 ) |
| 23 METRIC_RUNNING_BUILDS = metrics.Descriptor( | 29 METRIC_RUNNING_BUILDS = metrics.Descriptor( |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 36 METRIC_SCHEDULING_LATENCY = metrics.Descriptor( | 42 METRIC_SCHEDULING_LATENCY = metrics.Descriptor( |
| 37 name='buildbucket/builds/scheduling_latency', | 43 name='buildbucket/builds/scheduling_latency', |
| 38 description=( | 44 description=( |
| 39 'Average number of seconds for a scheduled build ' | 45 'Average number of seconds for a scheduled build ' |
| 40 'to remain in SCHEDULED leased state' | 46 'to remain in SCHEDULED leased state' |
| 41 ), | 47 ), |
| 42 value_type='double', | 48 value_type='double', |
| 43 labels=COMMON_LABELS, | 49 labels=COMMON_LABELS, |
| 44 ) | 50 ) |
| 45 | 51 |
| 52 # gae_ts_mon | |
| 53 FIELD_BUCKET = 'bucket' | |
| 54 COMMON_FIELDS = { | |
| 55 'buildbucket_hostname': app_identity.get_default_version_hostname(), | |
| 56 } | |
| 57 | |
| 58 | |
| 59 def _def_metric(metric_type, name, description): | |
| 60 return metric_type( | |
| 61 '/buildbucket/%s' % name, | |
|
Sergey Berezin
2015/12/17 00:08:01
Remove the leading slash: just 'buildbucket/%s'.
nodir
2015/12/17 01:28:54
Done.
| |
| 62 fields=COMMON_FIELDS, | |
| 63 description=description) | |
| 64 | |
| 65 | |
| 66 CREATE_COUNT = _def_metric( | |
| 67 gae_ts_mon.CounterMetric, | |
| 68 'created', | |
|
Sergey Berezin
2015/12/17 00:08:01
nit: I'd make the metric name more descriptive, e.
nodir
2015/12/17 01:28:54
Done
| |
| 69 'Build creation', | |
| 70 ) | |
| 71 START_COUNT = _def_metric( | |
| 72 gae_ts_mon.CounterMetric, | |
| 73 'started', | |
| 74 'Build start', | |
| 75 ) | |
| 76 COMPLETE_COUNT = _def_metric( | |
| 77 gae_ts_mon.CounterMetric, | |
| 78 'completed', | |
| 79 'Build completion, including success, failure and cancellation' | |
| 80 ) | |
| 81 HEARTBEAT_FAILURE_COUNT = _def_metric( | |
| 82 gae_ts_mon.CounterMetric, | |
| 83 'heartbeat_failures', | |
| 84 'Failures to extend a build lease' | |
| 85 ) | |
| 86 LEASE_COUNT = _def_metric( | |
| 87 gae_ts_mon.CounterMetric, | |
| 88 'leases', | |
| 89 'Successful build lease extension', | |
| 90 ) | |
| 91 LEASE_EXPIRATION_COUNT = _def_metric( | |
| 92 gae_ts_mon.CounterMetric, | |
| 93 'lease_expired', | |
| 94 'Build lease expirations' | |
| 95 ) | |
| 96 CURRENTLY_PENDING = _def_metric( | |
| 97 gae_ts_mon.GaugeMetric, | |
| 98 'pending', | |
| 99 'Number of pending builds', | |
| 100 ) | |
| 101 CURRENTLY_RUNNING = _def_metric( | |
| 102 gae_ts_mon.GaugeMetric, | |
| 103 'running', | |
| 104 'Number of running builds' | |
| 105 ) | |
| 106 LEASE_LATENCY = _def_metric( | |
| 107 gae_ts_mon.NonCumulativeDistributionMetric, | |
|
Sergey Berezin
2015/12/17 00:08:01
I think this should be CumulativeDistributionMetri
nodir
2015/12/17 01:28:54
FWIU, you are suggesting to do
LEASE_LATENCY.add(
Sergey Berezin
2015/12/17 02:30:18
It's a valid point; e.g. CQ measures both complete
nodir
2015/12/17 03:19:56
Renamed to never_leased_duration and scheduling_du
| |
| 108 'lease_latency', | |
| 109 'Duration between a build is created and it is leased for the first time', | |
| 110 ) | |
| 111 SCHEDULING_LATENCY = _def_metric( | |
| 112 gae_ts_mon.NonCumulativeDistributionMetric, | |
| 113 'scheduling_latency', | |
| 114 'Duration of a build being in SCHEDULED state', | |
| 115 ) | |
| 116 | |
| 117 | |
| 118 GAUGE_OF_CLOUD_METRIC = { | |
| 119 METRIC_PENDING_BUILDS: CURRENTLY_PENDING, | |
| 120 METRIC_RUNNING_BUILDS: CURRENTLY_RUNNING, | |
| 121 } | |
| 122 DISTRIBUTION_OF_CLOUD_METRIC = { | |
| 123 METRIC_LEASE_BUILD_LATENCY: LEASE_LATENCY, | |
| 124 METRIC_SCHEDULING_LATENCY: SCHEDULING_LATENCY, | |
| 125 } | |
| 126 | |
| 127 | |
| 128 def increment(metric, build, **fields): | |
| 129 fields = { | |
| 130 k: str(v) | |
| 131 for k, v in fields.iteritems() | |
| 132 if v is not None | |
|
Sergey Berezin
2015/12/17 00:08:01
Don't skip fields dynamically. ts_mon requires all
nodir
2015/12/17 01:28:54
Done.
| |
| 133 } | |
| 134 fields.setdefault(FIELD_BUCKET, build.bucket if build else '<no bucket>') | |
| 135 if build: | |
| 136 for t in build.tags: | |
| 137 k, v = t.split(':', 1) | |
| 138 fields.setdefault('tag_%s' % k, v) | |
|
Sergey Berezin
2015/12/17 00:08:01
How many different tags are there? Just checking t
nodir
2015/12/17 01:28:54
made them static
| |
| 139 metric.increment(fields) | |
| 140 | |
| 46 | 141 |
| 47 def set_gauge(buf, bucket, metric, value): | 142 def set_gauge(buf, bucket, metric, value): |
| 48 logging.info('Bucket %s: %s = %d', bucket, metric.name, value) | 143 logging.info('Bucket %s: %s = %d', bucket, metric.name, value) |
| 49 buf.set_gauge(metric, value, {LABEL_BUCKET: bucket}) | 144 buf.set_gauge(metric, value, {LABEL_BUCKET: bucket}) |
| 145 gae_ts_mon_metric = GAUGE_OF_CLOUD_METRIC.get(metric) | |
| 146 if gae_ts_mon_metric: | |
| 147 gae_ts_mon_metric.set(value, {FIELD_BUCKET: bucket}) | |
| 50 | 148 |
| 51 | 149 |
| 52 @ndb.tasklet | 150 @ndb.tasklet |
| 53 def send_build_status_metric(buf, bucket, metric, status): | 151 def send_build_status_metric(buf, bucket, metric, status): |
| 54 q = model.Build.query( | 152 q = model.Build.query( |
| 55 model.Build.bucket == bucket, | 153 model.Build.bucket == bucket, |
| 56 model.Build.status == status) | 154 model.Build.status == status) |
| 57 value = yield q.count_async() | 155 value = yield q.count_async() |
| 58 set_gauge(buf, bucket, metric, value) | 156 set_gauge(buf, bucket, metric, value) |
| 59 | 157 |
| 60 | 158 |
| 61 @ndb.tasklet | 159 @ndb.tasklet |
| 62 def send_build_latency(buf, metric, bucket, must_be_never_leased): | 160 def send_build_latency(buf, metric, bucket, must_be_never_leased): |
| 63 q = model.Build.query( | 161 q = model.Build.query( |
| 64 model.Build.bucket == bucket, | 162 model.Build.bucket == bucket, |
| 65 model.Build.status == model.BuildStatus.SCHEDULED, | 163 model.Build.status == model.BuildStatus.SCHEDULED, |
| 66 ) | 164 ) |
| 67 if must_be_never_leased: | 165 if must_be_never_leased: |
| 68 q = q.filter(model.Build.never_leased == True) | 166 q = q.filter(model.Build.never_leased == True) |
| 69 else: | 167 else: |
| 70 # Reuse the index that has never_leased | 168 # Reuse the index that has never_leased |
| 71 q = q.filter(model.Build.never_leased.IN((True, False))) | 169 q = q.filter(model.Build.never_leased.IN((True, False))) |
| 72 | 170 |
| 73 now = utils.utcnow() | 171 now = utils.utcnow() |
| 74 avg_latency = 0.0 | 172 avg_latency = 0.0 |
| 75 count = 0 | 173 count = 0 |
| 174 dist = gae_ts_mon.Distribution(gae_ts_mon.GeometricBucketer()) | |
| 76 for e in q.iter(projection=[model.Build.create_time]): | 175 for e in q.iter(projection=[model.Build.create_time]): |
| 77 avg_latency += (now - e.create_time).total_seconds() | 176 latency = (now - e.create_time).total_seconds() |
| 177 dist.add(latency) | |
| 178 avg_latency += latency | |
| 78 count += 1 | 179 count += 1 |
| 79 if count > 0: | 180 if count > 0: |
| 80 avg_latency /= count | 181 avg_latency /= count |
| 81 set_gauge(buf, bucket, metric, avg_latency) | 182 set_gauge(buf, bucket, metric, avg_latency) |
| 183 DISTRIBUTION_OF_CLOUD_METRIC[metric].set(dist, {FIELD_BUCKET: bucket}) | |
| 82 | 184 |
| 83 | 185 |
| 84 def send_all_metrics(): | 186 def send_all_metrics(): |
| 85 buf = metrics.Buffer() | 187 buf = metrics.Buffer() |
| 86 futures = [] | 188 futures = [] |
| 87 for b in config.get_buckets_async().get_result(): | 189 for b in config.get_buckets_async().get_result(): |
| 88 futures.extend([ | 190 futures.extend([ |
| 89 send_build_status_metric( | 191 send_build_status_metric( |
| 90 buf, b.name, METRIC_PENDING_BUILDS, model.BuildStatus.SCHEDULED), | 192 buf, b.name, METRIC_PENDING_BUILDS, model.BuildStatus.SCHEDULED), |
| 91 send_build_status_metric( | 193 send_build_status_metric( |
| 92 buf, b.name, METRIC_RUNNING_BUILDS, model.BuildStatus.STARTED), | 194 buf, b.name, METRIC_RUNNING_BUILDS, model.BuildStatus.STARTED), |
| 93 send_build_latency(buf, METRIC_LEASE_BUILD_LATENCY, b.name, True), | 195 send_build_latency(buf, METRIC_LEASE_BUILD_LATENCY, b.name, True), |
| 94 send_build_latency(buf, METRIC_SCHEDULING_LATENCY, b.name, False), | 196 send_build_latency(buf, METRIC_SCHEDULING_LATENCY, b.name, False), |
| 95 ]) | 197 ]) |
| 96 ndb.Future.wait_all(futures) | 198 ndb.Future.wait_all(futures) |
| 97 buf.flush() | 199 buf.flush() |
| 98 for f in futures: | 200 for f in futures: |
| 99 f.check_success() | 201 f.check_success() |
| OLD | NEW |