Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(299)

Unified Diff: appengine/cr-buildbucket/metrics.py

Issue 1532713002: buildbucket: add monitoring using ts_mon (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: duration Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « appengine/cr-buildbucket/main.py ('k') | appengine/cr-buildbucket/module-backend.yaml » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: appengine/cr-buildbucket/metrics.py
diff --git a/appengine/cr-buildbucket/metrics.py b/appengine/cr-buildbucket/metrics.py
index 3473d984961b11b00f2ecb7f29d821a15570f834..efc555090ee9541d4e36bde8c5e40455e97ee3b7 100644
--- a/appengine/cr-buildbucket/metrics.py
+++ b/appengine/cr-buildbucket/metrics.py
@@ -3,14 +3,26 @@
# found in the LICENSE file.
import logging
+
+from google.appengine.api import app_identity
from google.appengine.ext import ndb
from components import metrics
from components import utils
+import gae_ts_mon
import config
import model
+# TODO(nodir): remove Cloud Monitoring and refactor
+# when gae_ts_mon is stabilized
+
+# TODO(nodir): move this list to luci-config
+TAG_FIELDS = [
+ 'builder',
+ 'user_agent',
+]
+
LABEL_BUCKET = 'buildbucket/bucket'
COMMON_LABELS = {
LABEL_BUCKET: 'Bucket'
@@ -43,10 +55,102 @@ METRIC_SCHEDULING_LATENCY = metrics.Descriptor(
labels=COMMON_LABELS,
)
+# gae_ts_mon
+FIELD_BUCKET = 'bucket'
+COMMON_FIELDS = {
+ 'buildbucket_hostname': app_identity.get_default_version_hostname(),
+}
+
+
+def _def_metric(metric_type, name, description):
+ return metric_type(
+ 'buildbucket/%s' % name,
+ fields=COMMON_FIELDS,
+ description=description)
+
+
+CREATE_COUNT = _def_metric(
+ gae_ts_mon.CounterMetric,
+ 'builds/created',
+ 'Build creation',
+)
+START_COUNT = _def_metric(
+ gae_ts_mon.CounterMetric,
+ 'builds/started',
+ 'Build start',
+)
+COMPLETE_COUNT = _def_metric(
+ gae_ts_mon.CounterMetric,
+ 'builds/completed',
+ 'Build completion, including success, failure and cancellation'
+)
+HEARTBEAT_COUNT = _def_metric(
+ gae_ts_mon.CounterMetric,
+ 'builds/heartbeats',
+ 'Failures to extend a build lease'
+)
+LEASE_COUNT = _def_metric(
+ gae_ts_mon.CounterMetric,
+ 'builds/leases',
+ 'Successful build lease extension',
+)
+LEASE_EXPIRATION_COUNT = _def_metric(
+ gae_ts_mon.CounterMetric,
+ 'builds/lease_expired',
+ 'Build lease expirations'
+)
+CURRENTLY_PENDING = _def_metric(
+ gae_ts_mon.GaugeMetric,
+ 'builds/pending',
+ 'Number of pending builds',
+)
+CURRENTLY_RUNNING = _def_metric(
+ gae_ts_mon.GaugeMetric,
+ 'builds/running',
+ 'Number of running builds'
+)
+LEASE_LATENCY = _def_metric(
+ gae_ts_mon.NonCumulativeDistributionMetric,
+ 'builds/never_leased_duration',
+ 'Duration between a build is created and it is leased for the first time',
+)
+SCHEDULING_LATENCY = _def_metric(
+ gae_ts_mon.NonCumulativeDistributionMetric,
+ 'builds/scheduling_duration',
+ 'Duration of a build remaining in SCHEDULED state',
+)
+
+
+GAUGE_OF_CLOUD_METRIC = {
+ METRIC_PENDING_BUILDS: CURRENTLY_PENDING,
+ METRIC_RUNNING_BUILDS: CURRENTLY_RUNNING,
+}
+DISTRIBUTION_OF_CLOUD_METRIC = {
+ METRIC_LEASE_BUILD_LATENCY: LEASE_LATENCY,
+ METRIC_SCHEDULING_LATENCY: SCHEDULING_LATENCY,
+}
+
+def fields_for(build, **extra):
+ fields = extra
+ fields.setdefault(FIELD_BUCKET, build.bucket if build else '<no bucket>')
+ if build: # pragma: no branch
+ tags = dict(t.split(':', 1) for t in build.tags)
+ for t in TAG_FIELDS:
+ fields.setdefault(t, tags.get(t))
+ return fields
+
+
+def increment(metric, build, **fields): # pragma: no cover
+ """Increments a counter metric."""
+ metric.increment(fields_for(build, **fields))
+
def set_gauge(buf, bucket, metric, value):
logging.info('Bucket %s: %s = %d', bucket, metric.name, value)
buf.set_gauge(metric, value, {LABEL_BUCKET: bucket})
+ gae_ts_mon_metric = GAUGE_OF_CLOUD_METRIC.get(metric)
+ if gae_ts_mon_metric:
+ gae_ts_mon_metric.set(value, {FIELD_BUCKET: bucket})
@ndb.tasklet
@@ -73,12 +177,16 @@ def send_build_latency(buf, metric, bucket, must_be_never_leased):
now = utils.utcnow()
avg_latency = 0.0
count = 0
+ dist = gae_ts_mon.Distribution(gae_ts_mon.GeometricBucketer())
for e in q.iter(projection=[model.Build.create_time]):
- avg_latency += (now - e.create_time).total_seconds()
+ latency = (now - e.create_time).total_seconds()
+ dist.add(latency)
+ avg_latency += latency
count += 1
if count > 0:
avg_latency /= count
set_gauge(buf, bucket, metric, avg_latency)
+ DISTRIBUTION_OF_CLOUD_METRIC[metric].set(dist, {FIELD_BUCKET: bucket})
def send_all_metrics():
« no previous file with comments | « appengine/cr-buildbucket/main.py ('k') | appengine/cr-buildbucket/module-backend.yaml » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698