Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(263)

Side by Side Diff: appengine/swarming/ts_mon_metrics.py

Issue 2121323002: swarming: add active jobs pending times metric (Closed) Base URL: https://github.com/luci/luci-py.git@master
Patch Set: pylint Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """Timeseries metrics.""" 5 """Timeseries metrics."""
6 6
7 from collections import defaultdict 7 from collections import defaultdict
8 import itertools 8 import itertools
9 9
10 from google.appengine.ext import ndb 10 from google.appengine.ext import ndb
(...skipping 28 matching lines...) Expand all
39 jobs_completed = gae_ts_mon.CounterMetric( 39 jobs_completed = gae_ts_mon.CounterMetric(
40 'jobs/completed', 40 'jobs/completed',
41 description='Number of completed jobs.') 41 description='Number of completed jobs.')
42 42
43 43
44 jobs_durations = gae_ts_mon.CumulativeDistributionMetric( 44 jobs_durations = gae_ts_mon.CumulativeDistributionMetric(
45 'jobs/durations', bucketer=_bucketer, 45 'jobs/durations', bucketer=_bucketer,
46 description='Cycle times of completed jobs, in seconds.') 46 description='Cycle times of completed jobs, in seconds.')
47 47
48 48
49 jobs_pending_durations = gae_ts_mon.NonCumulativeDistributionMetric(
50 'jobs/pending_durations', bucketer=_bucketer,
51 description='Pending times of active jobs, in seconds.')
52
53
49 # Swarming-specific metric. Metric fields: 54 # Swarming-specific metric. Metric fields:
50 # - project_id: e.g. 'chromium' 55 # - project_id: e.g. 'chromium'
51 # - subproject_id: e.g. 'blink'. Set to empty string if not used. 56 # - subproject_id: e.g. 'blink'. Set to empty string if not used.
52 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' 57 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>'
53 # for buildbot jobs. 58 # for buildbot jobs.
54 tasks_expired = gae_ts_mon.CounterMetric( 59 tasks_expired = gae_ts_mon.CounterMetric(
55 'swarming/tasks/expired', 60 'swarming/tasks/expired',
56 description='Number of expired tasks') 61 description='Number of expired tasks')
57 62
58 # Global metric. Metric fields: 63 # Global metric. Metric fields:
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 fields['result'] = 'infra-failure' 139 fields['result'] = 'infra-failure'
135 elif task_result_summary.failure: 140 elif task_result_summary.failure:
136 fields['result'] = 'failure' 141 fields['result'] = 'failure'
137 else: 142 else:
138 fields['result'] = 'success' 143 fields['result'] = 'success'
139 jobs_completed.increment(fields=fields) 144 jobs_completed.increment(fields=fields)
140 jobs_durations.add(task_result_summary.duration, fields=fields) 145 jobs_durations.add(task_result_summary.duration, fields=fields)
141 146
142 147
143 @ndb.tasklet 148 @ndb.tasklet
144 def _set_jobs_metrics(): 149 def _set_jobs_metrics(now):
145 state_map = {task_result.State.RUNNING: 'running', 150 state_map = {task_result.State.RUNNING: 'running',
146 task_result.State.PENDING: 'pending'} 151 task_result.State.PENDING: 'pending'}
147 query_iter = task_result.get_result_summaries_query( 152 query_iter = task_result.get_result_summaries_query(
148 None, None, 'created_ts', 'pending_running', None).iter() 153 None, None, 'created_ts', 'pending_running', None).iter()
149 jobs_counts = defaultdict(lambda: 0) 154 jobs_counts = defaultdict(lambda: 0)
155 jobs_pending_distributions = defaultdict(
156 lambda: gae_ts_mon.Distribution(_bucketer))
150 while (yield query_iter.has_next_async()): 157 while (yield query_iter.has_next_async()):
151 summary = query_iter.next() 158 summary = query_iter.next()
152 status = state_map.get(summary.state, '') 159 status = state_map.get(summary.state, '')
153 fields = extract_job_fields(summary.tags) 160 fields = extract_job_fields(summary.tags)
154 target_fields = dict(TARGET_FIELDS) 161 target_fields = dict(TARGET_FIELDS)
155 if summary.bot_id: 162 if summary.bot_id:
156 target_fields['hostname'] = 'autogen:' + summary.bot_id 163 target_fields['hostname'] = 'autogen:' + summary.bot_id
157 if summary.bot_id and status == 'running': 164 if summary.bot_id and status == 'running':
158 jobs_running.set(True, target_fields=target_fields, fields=fields) 165 jobs_running.set(True, target_fields=target_fields, fields=fields)
159 fields['status'] = status 166 fields['status'] = status
160 jobs_counts[tuple(sorted(fields.iteritems()))] += 1 167
168 key = tuple(sorted(fields.iteritems()))
169
170 jobs_counts[key] += 1
171
172 pending_duration = summary.pending_now(now)
173 if pending_duration:
174 jobs_pending_distributions[key].add(pending_duration)
Sergey Berezin 2016/07/06 18:31:47 pending_durations.total_seconds()
Paweł Hajdan Jr. 2016/07/08 12:03:27 Done (the test also detected this).
161 175
162 for key, count in jobs_counts.iteritems(): 176 for key, count in jobs_counts.iteritems():
163 jobs_active.set(count, target_fields=TARGET_FIELDS, fields=dict(key)) 177 jobs_active.set(count, target_fields=TARGET_FIELDS, fields=dict(key))
164 178
179 for key, distribution in jobs_pending_distributions.iteritems():
180 jobs_pending_durations.set(
181 distribution, target_fields=TARGET_FIELDS, fields=dict(key))
182
165 183
166 @ndb.tasklet 184 @ndb.tasklet
167 def _set_executors_metrics(now): 185 def _set_executors_metrics(now):
168 query_iter = bot_management.BotInfo.query().iter() 186 query_iter = bot_management.BotInfo.query().iter()
169 while (yield query_iter.has_next_async()): 187 while (yield query_iter.has_next_async()):
170 bot_info = query_iter.next() 188 bot_info = query_iter.next()
171 status = 'ready' 189 status = 'ready'
172 if bot_info.task_id: 190 if bot_info.task_id:
173 status = 'running' 191 status = 'running'
174 elif bot_info.quarantined: 192 elif bot_info.quarantined:
175 status = 'quarantined' 193 status = 'quarantined'
176 elif bot_info.is_dead(now): 194 elif bot_info.is_dead(now):
177 status = 'dead' 195 status = 'dead'
178 196
179 target_fields = dict(TARGET_FIELDS) 197 target_fields = dict(TARGET_FIELDS)
180 target_fields['hostname'] = 'autogen:' + bot_info.id 198 target_fields['hostname'] = 'autogen:' + bot_info.id
181 199
182 executors_status.set(status, target_fields=target_fields) 200 executors_status.set(status, target_fields=target_fields)
183 executors_pool.set( 201 executors_pool.set(
184 pool_from_dimensions(bot_info.dimensions), 202 pool_from_dimensions(bot_info.dimensions),
185 target_fields=target_fields) 203 target_fields=target_fields)
186 204
187 205
188 @ndb.tasklet 206 @ndb.tasklet
189 def _set_global_metrics_async(now): 207 def _set_global_metrics_async(now):
190 yield _set_executors_metrics(now), _set_jobs_metrics() 208 yield _set_executors_metrics(now), _set_jobs_metrics(now)
191 209
192 210
193 def _set_global_metrics(now=None): 211 def _set_global_metrics(now=None):
194 if now is None: 212 if now is None:
195 now = utils.utcnow() 213 now = utils.utcnow()
196 _set_global_metrics_async(now).get_result() 214 _set_global_metrics_async(now).get_result()
197 215
198 216
199 def initialize(): 217 def initialize():
200 gae_ts_mon.register_global_metrics( 218 gae_ts_mon.register_global_metrics(
201 [jobs_running, executors_pool, executors_status]) 219 [jobs_running, executors_pool, executors_status])
202 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics) 220 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698