Chromium Code Reviews| Index: appengine/cr-buildbucket/metrics.py |
| diff --git a/appengine/cr-buildbucket/metrics.py b/appengine/cr-buildbucket/metrics.py |
| index 3473d984961b11b00f2ecb7f29d821a15570f834..5f63c359896b3c2e9f8d7020501d8ee8c3804dcf 100644 |
| --- a/appengine/cr-buildbucket/metrics.py |
| +++ b/appengine/cr-buildbucket/metrics.py |
| @@ -3,14 +3,20 @@ |
| # found in the LICENSE file. |
| import logging |
| + |
| +from google.appengine.api import app_identity |
| from google.appengine.ext import ndb |
| from components import metrics |
| from components import utils |
| +import gae_ts_mon |
| import config |
| import model |
| +# TODO(nodir): remove Cloud Monitoring and refactor |
| +# when gae_ts_mon is stabilized |
| + |
| LABEL_BUCKET = 'buildbucket/bucket' |
| COMMON_LABELS = { |
| LABEL_BUCKET: 'Bucket' |
| @@ -43,10 +49,102 @@ METRIC_SCHEDULING_LATENCY = metrics.Descriptor( |
| labels=COMMON_LABELS, |
| ) |
| +# gae_ts_mon |
| +FIELD_BUCKET = 'bucket' |
| +COMMON_FIELDS = { |
| + 'buildbucket_hostname': app_identity.get_default_version_hostname(), |
| +} |
| + |
| + |
| +def _def_metric(metric_type, name, description): |
| + return metric_type( |
| + '/buildbucket/%s' % name, |
|
Sergey Berezin
2015/12/17 00:08:01
Remove the leading slash: just 'buildbucket/%s'.
nodir
2015/12/17 01:28:54
Done.
|
| + fields=COMMON_FIELDS, |
| + description=description) |
| + |
| + |
| +CREATE_COUNT = _def_metric( |
| + gae_ts_mon.CounterMetric, |
| + 'created', |
|
Sergey Berezin
2015/12/17 00:08:01
nit: I'd make the metric name more descriptive, e.
nodir
2015/12/17 01:28:54
Done
|
| + 'Build creation', |
| +) |
| +START_COUNT = _def_metric( |
| + gae_ts_mon.CounterMetric, |
| + 'started', |
| + 'Build start', |
| +) |
| +COMPLETE_COUNT = _def_metric( |
| + gae_ts_mon.CounterMetric, |
| + 'completed', |
| + 'Build completion, including success, failure and cancellation' |
| +) |
| +HEARTBEAT_FAILURE_COUNT = _def_metric( |
| + gae_ts_mon.CounterMetric, |
| + 'heartbeat_failures', |
| + 'Failures to extend a build lease' |
| +) |
| +LEASE_COUNT = _def_metric( |
| + gae_ts_mon.CounterMetric, |
| + 'leases', |
| + 'Successful build lease extension', |
| +) |
| +LEASE_EXPIRATION_COUNT = _def_metric( |
| + gae_ts_mon.CounterMetric, |
| + 'lease_expired', |
| + 'Build lease expirations' |
| +) |
| +CURRENTLY_PENDING = _def_metric( |
| + gae_ts_mon.GaugeMetric, |
| + 'pending', |
| + 'Number of pending builds', |
| +) |
| +CURRENTLY_RUNNING = _def_metric( |
| + gae_ts_mon.GaugeMetric, |
| + 'running', |
| + 'Number of running builds' |
| +) |
| +LEASE_LATENCY = _def_metric( |
| + gae_ts_mon.NonCumulativeDistributionMetric, |
|
Sergey Berezin
2015/12/17 00:08:01
I think this should be CumulativeDistributionMetri
nodir
2015/12/17 01:28:54
FWIU, you are suggesting to do
LEASE_LATENCY.add(
Sergey Berezin
2015/12/17 02:30:18
It's a valid point; e.g. CQ measures both complete
nodir
2015/12/17 03:19:56
Renamed to never_leased_duration and scheduling_du
|
| + 'lease_latency', |
| + 'Duration between a build is created and it is leased for the first time', |
| +) |
| +SCHEDULING_LATENCY = _def_metric( |
| + gae_ts_mon.NonCumulativeDistributionMetric, |
| + 'scheduling_latency', |
| + 'Duration of a build being in SCHEDULED state', |
| +) |
| + |
| + |
| +GAUGE_OF_CLOUD_METRIC = { |
| + METRIC_PENDING_BUILDS: CURRENTLY_PENDING, |
| + METRIC_RUNNING_BUILDS: CURRENTLY_RUNNING, |
| +} |
| +DISTRIBUTION_OF_CLOUD_METRIC = { |
| + METRIC_LEASE_BUILD_LATENCY: LEASE_LATENCY, |
| + METRIC_SCHEDULING_LATENCY: SCHEDULING_LATENCY, |
| +} |
| + |
| + |
| +def increment(metric, build, **fields): |
| + fields = { |
| + k: str(v) |
| + for k, v in fields.iteritems() |
| + if v is not None |
|
Sergey Berezin
2015/12/17 00:08:01
Don't skip fields dynamically. ts_mon requires all
nodir
2015/12/17 01:28:54
Done.
|
| + } |
| + fields.setdefault(FIELD_BUCKET, build.bucket if build else '<no bucket>') |
| + if build: |
| + for t in build.tags: |
| + k, v = t.split(':', 1) |
| + fields.setdefault('tag_%s' % k, v) |
|
Sergey Berezin
2015/12/17 00:08:01
How many different tags are there? Just checking t
nodir
2015/12/17 01:28:54
made them static
|
| + metric.increment(fields) |
| + |
| def set_gauge(buf, bucket, metric, value): |
| logging.info('Bucket %s: %s = %d', bucket, metric.name, value) |
| buf.set_gauge(metric, value, {LABEL_BUCKET: bucket}) |
| + gae_ts_mon_metric = GAUGE_OF_CLOUD_METRIC.get(metric) |
| + if gae_ts_mon_metric: |
| + gae_ts_mon_metric.set(value, {FIELD_BUCKET: bucket}) |
| @ndb.tasklet |
| @@ -73,12 +171,16 @@ def send_build_latency(buf, metric, bucket, must_be_never_leased): |
| now = utils.utcnow() |
| avg_latency = 0.0 |
| count = 0 |
| + dist = gae_ts_mon.Distribution(gae_ts_mon.GeometricBucketer()) |
| for e in q.iter(projection=[model.Build.create_time]): |
| - avg_latency += (now - e.create_time).total_seconds() |
| + latency = (now - e.create_time).total_seconds() |
| + dist.add(latency) |
| + avg_latency += latency |
| count += 1 |
| if count > 0: |
| avg_latency /= count |
| set_gauge(buf, bucket, metric, avg_latency) |
| + DISTRIBUTION_OF_CLOUD_METRIC[metric].set(dist, {FIELD_BUCKET: bucket}) |
| def send_all_metrics(): |