| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 |
| 7 from google.appengine.api import app_identity |
| 6 from google.appengine.ext import ndb | 8 from google.appengine.ext import ndb |
| 7 | 9 |
| 8 from components import metrics | 10 from components import metrics |
| 9 from components import utils | 11 from components import utils |
| 12 import gae_ts_mon |
| 10 | 13 |
| 11 import config | 14 import config |
| 12 import model | 15 import model |
| 13 | 16 |
| 17 # TODO(nodir): remove Cloud Monitoring and refactor |
| 18 # when gae_ts_mon is stabilized |
| 19 |
| 20 # TODO(nodir): move this list to luci-config |
| 21 TAG_FIELDS = [ |
| 22 'builder', |
| 23 'user_agent', |
| 24 ] |
| 25 |
| 14 LABEL_BUCKET = 'buildbucket/bucket' | 26 LABEL_BUCKET = 'buildbucket/bucket' |
| 15 COMMON_LABELS = { | 27 COMMON_LABELS = { |
| 16 LABEL_BUCKET: 'Bucket' | 28 LABEL_BUCKET: 'Bucket' |
| 17 } | 29 } |
| 18 METRIC_PENDING_BUILDS = metrics.Descriptor( | 30 METRIC_PENDING_BUILDS = metrics.Descriptor( |
| 19 name='buildbucket/builds/pending', | 31 name='buildbucket/builds/pending', |
| 20 description='Number of pending builds', | 32 description='Number of pending builds', |
| 21 labels=COMMON_LABELS, | 33 labels=COMMON_LABELS, |
| 22 ) | 34 ) |
| 23 METRIC_RUNNING_BUILDS = metrics.Descriptor( | 35 METRIC_RUNNING_BUILDS = metrics.Descriptor( |
| (...skipping 12 matching lines...) Expand all Loading... |
| 36 METRIC_SCHEDULING_LATENCY = metrics.Descriptor( | 48 METRIC_SCHEDULING_LATENCY = metrics.Descriptor( |
| 37 name='buildbucket/builds/scheduling_latency', | 49 name='buildbucket/builds/scheduling_latency', |
| 38 description=( | 50 description=( |
| 39 'Average number of seconds for a scheduled build ' | 51 'Average number of seconds for a scheduled build ' |
| 40 'to remain in SCHEDULED leased state' | 52 'to remain in SCHEDULED leased state' |
| 41 ), | 53 ), |
| 42 value_type='double', | 54 value_type='double', |
| 43 labels=COMMON_LABELS, | 55 labels=COMMON_LABELS, |
| 44 ) | 56 ) |
| 45 | 57 |
| 58 # gae_ts_mon |
| 59 FIELD_BUCKET = 'bucket' |
| 60 COMMON_FIELDS = { |
| 61 'buildbucket_hostname': app_identity.get_default_version_hostname(), |
| 62 } |
| 63 |
| 64 |
| 65 def _def_metric(metric_type, name, description): |
| 66 return metric_type( |
| 67 'buildbucket/%s' % name, |
| 68 fields=COMMON_FIELDS, |
| 69 description=description) |
| 70 |
| 71 |
| 72 CREATE_COUNT = _def_metric( |
| 73 gae_ts_mon.CounterMetric, |
| 74 'builds/created', |
| 75 'Build creation', |
| 76 ) |
| 77 START_COUNT = _def_metric( |
| 78 gae_ts_mon.CounterMetric, |
| 79 'builds/started', |
| 80 'Build start', |
| 81 ) |
| 82 COMPLETE_COUNT = _def_metric( |
| 83 gae_ts_mon.CounterMetric, |
| 84 'builds/completed', |
| 85 'Build completion, including success, failure and cancellation' |
| 86 ) |
| 87 HEARTBEAT_COUNT = _def_metric( |
| 88 gae_ts_mon.CounterMetric, |
| 89 'builds/heartbeats', |
| 90 'Failures to extend a build lease' |
| 91 ) |
| 92 LEASE_COUNT = _def_metric( |
| 93 gae_ts_mon.CounterMetric, |
| 94 'builds/leases', |
| 95 'Successful build lease extension', |
| 96 ) |
| 97 LEASE_EXPIRATION_COUNT = _def_metric( |
| 98 gae_ts_mon.CounterMetric, |
| 99 'builds/lease_expired', |
| 100 'Build lease expirations' |
| 101 ) |
| 102 CURRENTLY_PENDING = _def_metric( |
| 103 gae_ts_mon.GaugeMetric, |
| 104 'builds/pending', |
| 105 'Number of pending builds', |
| 106 ) |
| 107 CURRENTLY_RUNNING = _def_metric( |
| 108 gae_ts_mon.GaugeMetric, |
| 109 'builds/running', |
| 110 'Number of running builds' |
| 111 ) |
| 112 LEASE_LATENCY = _def_metric( |
| 113 gae_ts_mon.NonCumulativeDistributionMetric, |
| 114 'builds/never_leased_duration', |
| 115 'Duration between a build is created and it is leased for the first time', |
| 116 ) |
| 117 SCHEDULING_LATENCY = _def_metric( |
| 118 gae_ts_mon.NonCumulativeDistributionMetric, |
| 119 'builds/scheduling_duration', |
| 120 'Duration of a build remaining in SCHEDULED state', |
| 121 ) |
| 122 |
| 123 |
| 124 GAUGE_OF_CLOUD_METRIC = { |
| 125 METRIC_PENDING_BUILDS: CURRENTLY_PENDING, |
| 126 METRIC_RUNNING_BUILDS: CURRENTLY_RUNNING, |
| 127 } |
| 128 DISTRIBUTION_OF_CLOUD_METRIC = { |
| 129 METRIC_LEASE_BUILD_LATENCY: LEASE_LATENCY, |
| 130 METRIC_SCHEDULING_LATENCY: SCHEDULING_LATENCY, |
| 131 } |
| 132 |
| 133 def fields_for(build, **extra): |
| 134 fields = extra |
| 135 fields.setdefault(FIELD_BUCKET, build.bucket if build else '<no bucket>') |
| 136 if build: # pragma: no branch |
| 137 tags = dict(t.split(':', 1) for t in build.tags) |
| 138 for t in TAG_FIELDS: |
| 139 fields.setdefault(t, tags.get(t)) |
| 140 return fields |
| 141 |
| 142 |
| 143 def increment(metric, build, **fields): # pragma: no cover |
| 144 """Increments a counter metric.""" |
| 145 metric.increment(fields_for(build, **fields)) |
| 146 |
| 46 | 147 |
| 47 def set_gauge(buf, bucket, metric, value): | 148 def set_gauge(buf, bucket, metric, value): |
| 48 logging.info('Bucket %s: %s = %d', bucket, metric.name, value) | 149 logging.info('Bucket %s: %s = %d', bucket, metric.name, value) |
| 49 buf.set_gauge(metric, value, {LABEL_BUCKET: bucket}) | 150 buf.set_gauge(metric, value, {LABEL_BUCKET: bucket}) |
| 151 gae_ts_mon_metric = GAUGE_OF_CLOUD_METRIC.get(metric) |
| 152 if gae_ts_mon_metric: |
| 153 gae_ts_mon_metric.set(value, {FIELD_BUCKET: bucket}) |
| 50 | 154 |
| 51 | 155 |
| 52 @ndb.tasklet | 156 @ndb.tasklet |
| 53 def send_build_status_metric(buf, bucket, metric, status): | 157 def send_build_status_metric(buf, bucket, metric, status): |
| 54 q = model.Build.query( | 158 q = model.Build.query( |
| 55 model.Build.bucket == bucket, | 159 model.Build.bucket == bucket, |
| 56 model.Build.status == status) | 160 model.Build.status == status) |
| 57 value = yield q.count_async() | 161 value = yield q.count_async() |
| 58 set_gauge(buf, bucket, metric, value) | 162 set_gauge(buf, bucket, metric, value) |
| 59 | 163 |
| 60 | 164 |
| 61 @ndb.tasklet | 165 @ndb.tasklet |
| 62 def send_build_latency(buf, metric, bucket, must_be_never_leased): | 166 def send_build_latency(buf, metric, bucket, must_be_never_leased): |
| 63 q = model.Build.query( | 167 q = model.Build.query( |
| 64 model.Build.bucket == bucket, | 168 model.Build.bucket == bucket, |
| 65 model.Build.status == model.BuildStatus.SCHEDULED, | 169 model.Build.status == model.BuildStatus.SCHEDULED, |
| 66 ) | 170 ) |
| 67 if must_be_never_leased: | 171 if must_be_never_leased: |
| 68 q = q.filter(model.Build.never_leased == True) | 172 q = q.filter(model.Build.never_leased == True) |
| 69 else: | 173 else: |
| 70 # Reuse the index that has never_leased | 174 # Reuse the index that has never_leased |
| 71 q = q.filter(model.Build.never_leased.IN((True, False))) | 175 q = q.filter(model.Build.never_leased.IN((True, False))) |
| 72 | 176 |
| 73 now = utils.utcnow() | 177 now = utils.utcnow() |
| 74 avg_latency = 0.0 | 178 avg_latency = 0.0 |
| 75 count = 0 | 179 count = 0 |
| 180 dist = gae_ts_mon.Distribution(gae_ts_mon.GeometricBucketer()) |
| 76 for e in q.iter(projection=[model.Build.create_time]): | 181 for e in q.iter(projection=[model.Build.create_time]): |
| 77 avg_latency += (now - e.create_time).total_seconds() | 182 latency = (now - e.create_time).total_seconds() |
| 183 dist.add(latency) |
| 184 avg_latency += latency |
| 78 count += 1 | 185 count += 1 |
| 79 if count > 0: | 186 if count > 0: |
| 80 avg_latency /= count | 187 avg_latency /= count |
| 81 set_gauge(buf, bucket, metric, avg_latency) | 188 set_gauge(buf, bucket, metric, avg_latency) |
| 189 DISTRIBUTION_OF_CLOUD_METRIC[metric].set(dist, {FIELD_BUCKET: bucket}) |
| 82 | 190 |
| 83 | 191 |
| 84 def send_all_metrics(): | 192 def send_all_metrics(): |
| 85 buf = metrics.Buffer() | 193 buf = metrics.Buffer() |
| 86 futures = [] | 194 futures = [] |
| 87 for b in config.get_buckets_async().get_result(): | 195 for b in config.get_buckets_async().get_result(): |
| 88 futures.extend([ | 196 futures.extend([ |
| 89 send_build_status_metric( | 197 send_build_status_metric( |
| 90 buf, b.name, METRIC_PENDING_BUILDS, model.BuildStatus.SCHEDULED), | 198 buf, b.name, METRIC_PENDING_BUILDS, model.BuildStatus.SCHEDULED), |
| 91 send_build_status_metric( | 199 send_build_status_metric( |
| 92 buf, b.name, METRIC_RUNNING_BUILDS, model.BuildStatus.STARTED), | 200 buf, b.name, METRIC_RUNNING_BUILDS, model.BuildStatus.STARTED), |
| 93 send_build_latency(buf, METRIC_LEASE_BUILD_LATENCY, b.name, True), | 201 send_build_latency(buf, METRIC_LEASE_BUILD_LATENCY, b.name, True), |
| 94 send_build_latency(buf, METRIC_SCHEDULING_LATENCY, b.name, False), | 202 send_build_latency(buf, METRIC_SCHEDULING_LATENCY, b.name, False), |
| 95 ]) | 203 ]) |
| 96 ndb.Future.wait_all(futures) | 204 ndb.Future.wait_all(futures) |
| 97 buf.flush() | 205 buf.flush() |
| 98 for f in futures: | 206 for f in futures: |
| 99 f.check_success() | 207 f.check_success() |
| OLD | NEW |