Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The LUCI Authors. All rights reserved. | 1 # Copyright 2016 The LUCI Authors. All rights reserved. |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 | 2 # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 # that can be found in the LICENSE file. | 3 # that can be found in the LICENSE file. |
| 4 | 4 |
| 5 """Timeseries metrics.""" | 5 """Timeseries metrics.""" |
| 6 | 6 |
| 7 from collections import defaultdict | 7 from collections import defaultdict |
| 8 import itertools | 8 import itertools |
| 9 | 9 |
| 10 from google.appengine.ext import ndb | 10 from google.appengine.ext import ndb |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 39 jobs_completed = gae_ts_mon.CounterMetric( | 39 jobs_completed = gae_ts_mon.CounterMetric( |
| 40 'jobs/completed', | 40 'jobs/completed', |
| 41 description='Number of completed jobs.') | 41 description='Number of completed jobs.') |
| 42 | 42 |
| 43 | 43 |
| 44 jobs_durations = gae_ts_mon.CumulativeDistributionMetric( | 44 jobs_durations = gae_ts_mon.CumulativeDistributionMetric( |
| 45 'jobs/durations', bucketer=_bucketer, | 45 'jobs/durations', bucketer=_bucketer, |
| 46 description='Cycle times of completed jobs, in seconds.') | 46 description='Cycle times of completed jobs, in seconds.') |
| 47 | 47 |
| 48 | 48 |
| 49 jobs_pending_durations = gae_ts_mon.NonCumulativeDistributionMetric( | |
| 50 'jobs/pending_durations', bucketer=_bucketer, | |
| 51 description='Pending times of active jobs, in seconds.') | |
| 52 | |
| 53 | |
| 49 # Swarming-specific metric. Metric fields: | 54 # Swarming-specific metric. Metric fields: |
| 50 # - project_id: e.g. 'chromium' | 55 # - project_id: e.g. 'chromium' |
| 51 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 56 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 52 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 57 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 53 # for buildbot jobs. | 58 # for buildbot jobs. |
| 54 tasks_expired = gae_ts_mon.CounterMetric( | 59 tasks_expired = gae_ts_mon.CounterMetric( |
| 55 'swarming/tasks/expired', | 60 'swarming/tasks/expired', |
| 56 description='Number of expired tasks') | 61 description='Number of expired tasks') |
| 57 | 62 |
| 58 # Global metric. Metric fields: | 63 # Global metric. Metric fields: |
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 134 fields['result'] = 'infra-failure' | 139 fields['result'] = 'infra-failure' |
| 135 elif task_result_summary.failure: | 140 elif task_result_summary.failure: |
| 136 fields['result'] = 'failure' | 141 fields['result'] = 'failure' |
| 137 else: | 142 else: |
| 138 fields['result'] = 'success' | 143 fields['result'] = 'success' |
| 139 jobs_completed.increment(fields=fields) | 144 jobs_completed.increment(fields=fields) |
| 140 jobs_durations.add(task_result_summary.duration, fields=fields) | 145 jobs_durations.add(task_result_summary.duration, fields=fields) |
| 141 | 146 |
| 142 | 147 |
| 143 @ndb.tasklet | 148 @ndb.tasklet |
| 144 def _set_jobs_metrics(): | 149 def _set_jobs_metrics(now): |
| 145 state_map = {task_result.State.RUNNING: 'running', | 150 state_map = {task_result.State.RUNNING: 'running', |
| 146 task_result.State.PENDING: 'pending'} | 151 task_result.State.PENDING: 'pending'} |
| 147 query_iter = task_result.get_result_summaries_query( | 152 query_iter = task_result.get_result_summaries_query( |
| 148 None, None, 'created_ts', 'pending_running', None).iter() | 153 None, None, 'created_ts', 'pending_running', None).iter() |
| 149 jobs_counts = defaultdict(lambda: 0) | 154 jobs_counts = defaultdict(lambda: 0) |
| 155 jobs_pending_distributions = defaultdict( | |
| 156 lambda: gae_ts_mon.Distribution(_bucketer)) | |
| 150 while (yield query_iter.has_next_async()): | 157 while (yield query_iter.has_next_async()): |
| 151 summary = query_iter.next() | 158 summary = query_iter.next() |
| 152 status = state_map.get(summary.state, '') | 159 status = state_map.get(summary.state, '') |
| 153 fields = extract_job_fields(summary.tags) | 160 fields = extract_job_fields(summary.tags) |
| 154 target_fields = dict(TARGET_FIELDS) | 161 target_fields = dict(TARGET_FIELDS) |
| 155 if summary.bot_id: | 162 if summary.bot_id: |
| 156 target_fields['hostname'] = 'autogen:' + summary.bot_id | 163 target_fields['hostname'] = 'autogen:' + summary.bot_id |
| 157 if summary.bot_id and status == 'running': | 164 if summary.bot_id and status == 'running': |
| 158 jobs_running.set(True, target_fields=target_fields, fields=fields) | 165 jobs_running.set(True, target_fields=target_fields, fields=fields) |
| 159 fields['status'] = status | 166 fields['status'] = status |
| 160 jobs_counts[tuple(sorted(fields.iteritems()))] += 1 | 167 |
| 168 key = tuple(sorted(fields.iteritems())) | |
| 169 | |
| 170 jobs_counts[key] += 1 | |
| 171 | |
| 172 pending_duration = summary.pending_now(now) | |
| 173 if pending_duration: | |
| 174 jobs_pending_distributions[key].add(pending_duration) | |
|
Sergey Berezin
2016/07/06 18:31:47
pending_durations.total_seconds()
Paweł Hajdan Jr.
2016/07/08 12:03:27
Done (the test also detected this).
| |
| 161 | 175 |
| 162 for key, count in jobs_counts.iteritems(): | 176 for key, count in jobs_counts.iteritems(): |
| 163 jobs_active.set(count, target_fields=TARGET_FIELDS, fields=dict(key)) | 177 jobs_active.set(count, target_fields=TARGET_FIELDS, fields=dict(key)) |
| 164 | 178 |
| 179 for key, distribution in jobs_pending_distributions.iteritems(): | |
| 180 jobs_pending_durations.set( | |
| 181 distribution, target_fields=TARGET_FIELDS, fields=dict(key)) | |
| 182 | |
| 165 | 183 |
| 166 @ndb.tasklet | 184 @ndb.tasklet |
| 167 def _set_executors_metrics(now): | 185 def _set_executors_metrics(now): |
| 168 query_iter = bot_management.BotInfo.query().iter() | 186 query_iter = bot_management.BotInfo.query().iter() |
| 169 while (yield query_iter.has_next_async()): | 187 while (yield query_iter.has_next_async()): |
| 170 bot_info = query_iter.next() | 188 bot_info = query_iter.next() |
| 171 status = 'ready' | 189 status = 'ready' |
| 172 if bot_info.task_id: | 190 if bot_info.task_id: |
| 173 status = 'running' | 191 status = 'running' |
| 174 elif bot_info.quarantined: | 192 elif bot_info.quarantined: |
| 175 status = 'quarantined' | 193 status = 'quarantined' |
| 176 elif bot_info.is_dead(now): | 194 elif bot_info.is_dead(now): |
| 177 status = 'dead' | 195 status = 'dead' |
| 178 | 196 |
| 179 target_fields = dict(TARGET_FIELDS) | 197 target_fields = dict(TARGET_FIELDS) |
| 180 target_fields['hostname'] = 'autogen:' + bot_info.id | 198 target_fields['hostname'] = 'autogen:' + bot_info.id |
| 181 | 199 |
| 182 executors_status.set(status, target_fields=target_fields) | 200 executors_status.set(status, target_fields=target_fields) |
| 183 executors_pool.set( | 201 executors_pool.set( |
| 184 pool_from_dimensions(bot_info.dimensions), | 202 pool_from_dimensions(bot_info.dimensions), |
| 185 target_fields=target_fields) | 203 target_fields=target_fields) |
| 186 | 204 |
| 187 | 205 |
| 188 @ndb.tasklet | 206 @ndb.tasklet |
| 189 def _set_global_metrics_async(now): | 207 def _set_global_metrics_async(now): |
| 190 yield _set_executors_metrics(now), _set_jobs_metrics() | 208 yield _set_executors_metrics(now), _set_jobs_metrics(now) |
| 191 | 209 |
| 192 | 210 |
| 193 def _set_global_metrics(now=None): | 211 def _set_global_metrics(now=None): |
| 194 if now is None: | 212 if now is None: |
| 195 now = utils.utcnow() | 213 now = utils.utcnow() |
| 196 _set_global_metrics_async(now).get_result() | 214 _set_global_metrics_async(now).get_result() |
| 197 | 215 |
| 198 | 216 |
| 199 def initialize(): | 217 def initialize(): |
| 200 gae_ts_mon.register_global_metrics( | 218 gae_ts_mon.register_global_metrics( |
| 201 [jobs_running, executors_pool, executors_status]) | 219 [jobs_running, executors_pool, executors_status]) |
| 202 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics) | 220 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics) |
| OLD | NEW |