Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(119)

Side by Side Diff: appengine/swarming/ts_mon_metrics.py

Issue 2705273003: Roll infra_libs and gae_ts_mon in luci-py, and add field_specs to all metrics (Closed)
Patch Set: Rebase Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """Timeseries metrics.""" 5 """Timeseries metrics."""
6 6
7 from collections import defaultdict 7 from collections import defaultdict
8 import datetime 8 import datetime
9 import itertools 9 import itertools
10 import json 10 import json
(...skipping 19 matching lines...) Expand all
30 # Override default target fields for app-global metrics. 30 # Override default target fields for app-global metrics.
31 TARGET_FIELDS = { 31 TARGET_FIELDS = {
32 'job_name': '', # module name 32 'job_name': '', # module name
33 'hostname': '', # version 33 'hostname': '', # version
34 'task_num': 0, # instance ID 34 'task_num': 0, # instance ID
35 } 35 }
36 36
37 # A custom bucketer with 12% resolution in the range of 1..10**5. 37 # A custom bucketer with 12% resolution in the range of 1..10**5.
38 # Used for job cycle times. 38 # Used for job cycle times.
39 _bucketer = gae_ts_mon.GeometricBucketer(growth_factor=10**0.05, 39 _bucketer = gae_ts_mon.GeometricBucketer(growth_factor=10**0.05,
40 num_finite_buckets=100) 40 num_finite_buckets=100)
41 41
42 # Regular (instance-local) metrics: jobs/completed and jobs/durations. 42 # Regular (instance-local) metrics: jobs/completed and jobs/durations.
43 # Both have the following metric fields: 43 # Both have the following metric fields:
44 # - project_id: e.g. 'chromium' 44 # - project_id: e.g. 'chromium'
45 # - subproject_id: e.g. 'blink'. Set to empty string if not used. 45 # - subproject_id: e.g. 'blink'. Set to empty string if not used.
46 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' 46 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>'
47 # for buildbot jobs. 47 # for buildbot jobs.
48 # - result: one of 'success', 'failure', or 'infra-failure'. 48 # - result: one of 'success', 'failure', or 'infra-failure'.
49 jobs_completed = gae_ts_mon.CounterMetric( 49 jobs_completed = gae_ts_mon.CounterMetric(
50 'jobs/completed', 50 'jobs/completed',
51 description='Number of completed jobs.') 51 'Number of completed jobs.', [
52 gae_ts_mon.StringField('spec_name'),
53 gae_ts_mon.StringField('project_id'),
54 gae_ts_mon.StringField('subproject_id'),
55 gae_ts_mon.StringField('result'),
56 ])
52 57
53 58
54 jobs_durations = gae_ts_mon.CumulativeDistributionMetric( 59 jobs_durations = gae_ts_mon.CumulativeDistributionMetric(
55 'jobs/durations', bucketer=_bucketer, 60 'jobs/durations',
56 description='Cycle times of completed jobs, in seconds.') 61 'Cycle times of completed jobs, in seconds.', [
62 gae_ts_mon.StringField('spec_name'),
63 gae_ts_mon.StringField('project_id'),
64 gae_ts_mon.StringField('subproject_id'),
65 gae_ts_mon.StringField('result'),
66 ],
67 bucketer=_bucketer)
57 68
58 69
59 # Similar to jobs/completed and jobs/duration, but with a dedup field. 70 # Similar to jobs/completed and jobs/duration, but with a dedup field.
60 # - project_id: e.g. 'chromium' 71 # - project_id: e.g. 'chromium'
61 # - subproject_id: e.g. 'blink'. Set to empty string if not used. 72 # - subproject_id: e.g. 'blink'. Set to empty string if not used.
62 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' 73 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>'
63 # for buildbot jobs. 74 # for buildbot jobs.
64 # - deduped: boolean describing whether the job was deduped or not. 75 # - deduped: boolean describing whether the job was deduped or not.
65 jobs_requested = gae_ts_mon.CounterMetric( 76 jobs_requested = gae_ts_mon.CounterMetric(
66 'jobs/requested', 77 'jobs/requested',
67 description='Number of requested jobs over time.') 78 'Number of requested jobs over time.', [
79 gae_ts_mon.StringField('spec_name'),
80 gae_ts_mon.StringField('project_id'),
81 gae_ts_mon.StringField('subproject_id'),
82 gae_ts_mon.BooleanField('deduped'),
83 ])
68 84
69 85
70 # Swarming-specific metric. Metric fields: 86 # Swarming-specific metric. Metric fields:
71 # - project_id: e.g. 'chromium' 87 # - project_id: e.g. 'chromium'
72 # - subproject_id: e.g. 'blink'. Set to empty string if not used. 88 # - subproject_id: e.g. 'blink'. Set to empty string if not used.
73 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' 89 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>'
74 # for buildbot jobs. 90 # for buildbot jobs.
75 tasks_expired = gae_ts_mon.CounterMetric( 91 tasks_expired = gae_ts_mon.CounterMetric(
76 'swarming/tasks/expired', 92 'swarming/tasks/expired',
77 description='Number of expired tasks') 93 'Number of expired tasks', [
94 gae_ts_mon.StringField('spec_name'),
95 gae_ts_mon.StringField('project_id'),
96 gae_ts_mon.StringField('subproject_id'),
97 ])
78 98
79 # Global metric. Metric fields: 99 # Global metric. Metric fields:
80 # - project_id: e.g. 'chromium' 100 # - project_id: e.g. 'chromium'
81 # - subproject_id: e.g. 'blink'. Set to empty string if not used. 101 # - subproject_id: e.g. 'blink'. Set to empty string if not used.
82 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' 102 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>'
83 # for buildbot jobs. 103 # for buildbot jobs.
84 # Override target field: 104 # Override target field:
85 # - hostname: 'autogen:<executor_id>': name of the bot that executed a job, 105 # - hostname: 'autogen:<executor_id>': name of the bot that executed a job,
86 # or an empty string. e.g. 'autogen:swarm42-m4'. 106 # or an empty string. e.g. 'autogen:swarm42-m4'.
87 # Value should be 'pending' or 'running'. Completed / canceled jobs should not 107 # Value should be 'pending' or 'running'. Completed / canceled jobs should not
88 # send this metric. 108 # send this metric.
89 jobs_running = gae_ts_mon.BooleanMetric( 109 jobs_running = gae_ts_mon.BooleanMetric(
90 'jobs/running', 110 'jobs/running',
91 description='Presence metric for a running job.') 111 'Presence metric for a running job.', [
112 gae_ts_mon.StringField('spec_name'),
113 gae_ts_mon.StringField('project_id'),
114 gae_ts_mon.StringField('subproject_id'),
115 ])
92 116
93 # Global metric. Metric fields: 117 # Global metric. Metric fields:
94 # - project_id: e.g. 'chromium' 118 # - project_id: e.g. 'chromium'
95 # - subproject_id: e.g. 'blink'. Set to empty string if not used. 119 # - subproject_id: e.g. 'blink'. Set to empty string if not used.
96 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' 120 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>'
97 # for buildbot jobs. 121 # for buildbot jobs.
98 # - status: 'pending' or 'running'. 122 # - status: 'pending' or 'running'.
99 jobs_active = gae_ts_mon.GaugeMetric( 123 jobs_active = gae_ts_mon.GaugeMetric(
100 'jobs/active', 124 'jobs/active',
101 description='Number of running, pending or otherwise active jobs.') 125 'Number of running, pending or otherwise active jobs.', [
126 gae_ts_mon.StringField('spec_name'),
127 gae_ts_mon.StringField('project_id'),
128 gae_ts_mon.StringField('subproject_id'),
129 gae_ts_mon.StringField('status'),
130 ])
102 131
103 132
104 # Global metric. Target field: hostname = 'autogen:<executor_id>' (bot id). 133 # Global metric. Target field: hostname = 'autogen:<executor_id>' (bot id).
105 executors_pool = gae_ts_mon.StringMetric( 134 executors_pool = gae_ts_mon.StringMetric(
106 'executors/pool', 135 'executors/pool',
107 description='Pool name for a given job executor.') 136 'Pool name for a given job executor.',
137 None)
108 138
109 139
110 # Global metric. Target fields: 140 # Global metric. Target fields:
111 # - hostname = 'autogen:<executor_id>' (bot id). 141 # - hostname = 'autogen:<executor_id>' (bot id).
112 # Status value must be 'ready', 'running', or anything else, possibly 142 # Status value must be 'ready', 'running', or anything else, possibly
113 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or 143 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or
114 # 'dead'. 144 # 'dead'.
115 executors_status = gae_ts_mon.StringMetric( 145 executors_status = gae_ts_mon.StringMetric(
116 'executors/status', 146 'executors/status',
117 description=('Status of a job executor.')) 147 'Status of a job executor.',
148 None)
118 149
119 150
120 # Global metric. Target fields: 151 # Global metric. Target fields:
121 # - hostname = 'autogen:<executor_id>' (bot id). 152 # - hostname = 'autogen:<executor_id>' (bot id).
122 # Status value must be 'ready', 'running', or anything else, possibly 153 # Status value must be 'ready', 'running', or anything else, possibly
123 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or 154 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or
124 # 'dead'. 155 # 'dead'.
125 # Note that 'running' will report data as long as the job is running, 156 # Note that 'running' will report data as long as the job is running,
126 # so it is best to restrict data to status == 'pending.' 157 # so it is best to restrict data to status == 'pending.'
127 jobs_pending_durations = gae_ts_mon.NonCumulativeDistributionMetric( 158 jobs_pending_durations = gae_ts_mon.NonCumulativeDistributionMetric(
128 'jobs/pending_durations', bucketer=_bucketer, 159 'jobs/pending_durations',
129 description='Pending times of active jobs, in seconds.') 160 'Pending times of active jobs, in seconds.', [
161 gae_ts_mon.StringField('spec_name'),
162 gae_ts_mon.StringField('project_id'),
163 gae_ts_mon.StringField('subproject_id'),
164 gae_ts_mon.StringField('status'),
165 ],
166 bucketer=_bucketer)
130 167
131 168
132 # Global metric. Target fields: 169 # Global metric. Target fields:
133 # - hostname = 'autogen:<executor_id>' (bot id). 170 # - hostname = 'autogen:<executor_id>' (bot id).
134 # Status value must be 'ready', 'running', or anything else, possibly 171 # Status value must be 'ready', 'running', or anything else, possibly
135 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or 172 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or
136 # 'dead'. 173 # 'dead'.
137 # Note that 'running' will report data as long as the job is running, 174 # Note that 'running' will report data as long as the job is running,
138 # so it is best to restrict data to status == 'pending.' 175 # so it is best to restrict data to status == 'pending.'
139 jobs_max_pending_duration = gae_ts_mon.FloatMetric( 176 jobs_max_pending_duration = gae_ts_mon.FloatMetric(
140 'jobs/max_pending_duration', 177 'jobs/max_pending_duration',
141 description='Maximum pending seconds of pending jobs.') 178 'Maximum pending seconds of pending jobs.', [
179 gae_ts_mon.StringField('spec_name'),
180 gae_ts_mon.StringField('project_id'),
181 gae_ts_mon.StringField('subproject_id'),
182 gae_ts_mon.StringField('status'),
183 ])
142 184
143 185
144 # Global metric. Target fields: 186 # Global metric. Target fields:
145 # - machine_type = server.lease_management.MachineType.key.id(). 187 # - machine_type = server.lease_management.MachineType.key.id().
146 # - enabled = server.lease_management.MachineType.enabled. 188 # - enabled = server.lease_management.MachineType.enabled.
147 machine_types_target_size = gae_ts_mon.GaugeMetric( 189 machine_types_target_size = gae_ts_mon.GaugeMetric(
148 'swarming/machine_types/target_size', 190 'swarming/machine_types/target_size',
149 description='Target size of each MachineType leased from Machine Provider.') 191 'Target size of each MachineType leased from Machine Provider.', [
192 gae_ts_mon.BooleanField('enabled'),
193 gae_ts_mon.StringField('machine_type'),
194 ])
150 195
151 196
152 def pool_from_dimensions(dimensions): 197 def pool_from_dimensions(dimensions):
153 """Return a canonical string of flattened dimensions.""" 198 """Return a canonical string of flattened dimensions."""
154 iterables = (map(lambda x: '%s:%s' % (key, x), values) 199 iterables = (map(lambda x: '%s:%s' % (key, x), values)
155 for key, values in dimensions.iteritems() 200 for key, values in dimensions.iteritems()
156 if key not in IGNORED_DIMENSIONS) 201 if key not in IGNORED_DIMENSIONS)
157 return '|'.join(sorted(itertools.chain(*iterables))) 202 return '|'.join(sorted(itertools.chain(*iterables)))
158 203
159 204
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after
375 gae_ts_mon.register_global_metrics([ 420 gae_ts_mon.register_global_metrics([
376 executors_pool, 421 executors_pool,
377 executors_status, 422 executors_status,
378 jobs_active, 423 jobs_active,
379 jobs_max_pending_duration, 424 jobs_max_pending_duration,
380 jobs_pending_durations, 425 jobs_pending_durations,
381 jobs_running, 426 jobs_running,
382 machine_types_target_size, 427 machine_types_target_size,
383 ]) 428 ])
384 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics) 429 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics)
OLDNEW
« no previous file with comments | « appengine/swarming/swarming_bot/bot_code/bot_main.py ('k') | appengine/third_party/gae_ts_mon/README.md » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698