| OLD | NEW |
| 1 # Copyright 2016 The LUCI Authors. All rights reserved. | 1 # Copyright 2016 The LUCI Authors. All rights reserved. |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 | 2 # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 # that can be found in the LICENSE file. | 3 # that can be found in the LICENSE file. |
| 4 | 4 |
| 5 """Timeseries metrics.""" | 5 """Timeseries metrics.""" |
| 6 | 6 |
| 7 from collections import defaultdict | 7 from collections import defaultdict |
| 8 import datetime | 8 import datetime |
| 9 import itertools | 9 import itertools |
| 10 import json | 10 import json |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 # Override default target fields for app-global metrics. | 30 # Override default target fields for app-global metrics. |
| 31 TARGET_FIELDS = { | 31 TARGET_FIELDS = { |
| 32 'job_name': '', # module name | 32 'job_name': '', # module name |
| 33 'hostname': '', # version | 33 'hostname': '', # version |
| 34 'task_num': 0, # instance ID | 34 'task_num': 0, # instance ID |
| 35 } | 35 } |
| 36 | 36 |
| 37 # A custom bucketer with 12% resolution in the range of 1..10**5. | 37 # A custom bucketer with 12% resolution in the range of 1..10**5. |
| 38 # Used for job cycle times. | 38 # Used for job cycle times. |
| 39 _bucketer = gae_ts_mon.GeometricBucketer(growth_factor=10**0.05, | 39 _bucketer = gae_ts_mon.GeometricBucketer(growth_factor=10**0.05, |
| 40 num_finite_buckets=100) | 40 num_finite_buckets=100) |
| 41 | 41 |
| 42 # Regular (instance-local) metrics: jobs/completed and jobs/durations. | 42 # Regular (instance-local) metrics: jobs/completed and jobs/durations. |
| 43 # Both have the following metric fields: | 43 # Both have the following metric fields: |
| 44 # - project_id: e.g. 'chromium' | 44 # - project_id: e.g. 'chromium' |
| 45 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 45 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 46 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 46 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 47 # for buildbot jobs. | 47 # for buildbot jobs. |
| 48 # - result: one of 'success', 'failure', or 'infra-failure'. | 48 # - result: one of 'success', 'failure', or 'infra-failure'. |
| 49 jobs_completed = gae_ts_mon.CounterMetric( | 49 jobs_completed = gae_ts_mon.CounterMetric( |
| 50 'jobs/completed', | 50 'jobs/completed', |
| 51 description='Number of completed jobs.') | 51 'Number of completed jobs.', [ |
| 52 gae_ts_mon.StringField('spec_name'), |
| 53 gae_ts_mon.StringField('project_id'), |
| 54 gae_ts_mon.StringField('subproject_id'), |
| 55 gae_ts_mon.StringField('result'), |
| 56 ]) |
| 52 | 57 |
| 53 | 58 |
| 54 jobs_durations = gae_ts_mon.CumulativeDistributionMetric( | 59 jobs_durations = gae_ts_mon.CumulativeDistributionMetric( |
| 55 'jobs/durations', bucketer=_bucketer, | 60 'jobs/durations', |
| 56 description='Cycle times of completed jobs, in seconds.') | 61 'Cycle times of completed jobs, in seconds.', [ |
| 62 gae_ts_mon.StringField('spec_name'), |
| 63 gae_ts_mon.StringField('project_id'), |
| 64 gae_ts_mon.StringField('subproject_id'), |
| 65 gae_ts_mon.StringField('result'), |
| 66 ], |
| 67 bucketer=_bucketer) |
| 57 | 68 |
| 58 | 69 |
| 59 # Similar to jobs/completed and jobs/duration, but with a dedup field. | 70 # Similar to jobs/completed and jobs/duration, but with a dedup field. |
| 60 # - project_id: e.g. 'chromium' | 71 # - project_id: e.g. 'chromium' |
| 61 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 72 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 62 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 73 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 63 # for buildbot jobs. | 74 # for buildbot jobs. |
| 64 # - deduped: boolean describing whether the job was deduped or not. | 75 # - deduped: boolean describing whether the job was deduped or not. |
| 65 jobs_requested = gae_ts_mon.CounterMetric( | 76 jobs_requested = gae_ts_mon.CounterMetric( |
| 66 'jobs/requested', | 77 'jobs/requested', |
| 67 description='Number of requested jobs over time.') | 78 'Number of requested jobs over time.', [ |
| 79 gae_ts_mon.StringField('spec_name'), |
| 80 gae_ts_mon.StringField('project_id'), |
| 81 gae_ts_mon.StringField('subproject_id'), |
| 82 gae_ts_mon.BooleanField('deduped'), |
| 83 ]) |
| 68 | 84 |
| 69 | 85 |
| 70 # Swarming-specific metric. Metric fields: | 86 # Swarming-specific metric. Metric fields: |
| 71 # - project_id: e.g. 'chromium' | 87 # - project_id: e.g. 'chromium' |
| 72 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 88 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 73 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 89 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 74 # for buildbot jobs. | 90 # for buildbot jobs. |
| 75 tasks_expired = gae_ts_mon.CounterMetric( | 91 tasks_expired = gae_ts_mon.CounterMetric( |
| 76 'swarming/tasks/expired', | 92 'swarming/tasks/expired', |
| 77 description='Number of expired tasks') | 93 'Number of expired tasks', [ |
| 94 gae_ts_mon.StringField('spec_name'), |
| 95 gae_ts_mon.StringField('project_id'), |
| 96 gae_ts_mon.StringField('subproject_id'), |
| 97 ]) |
| 78 | 98 |
| 79 # Global metric. Metric fields: | 99 # Global metric. Metric fields: |
| 80 # - project_id: e.g. 'chromium' | 100 # - project_id: e.g. 'chromium' |
| 81 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 101 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 82 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 102 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 83 # for buildbot jobs. | 103 # for buildbot jobs. |
| 84 # Override target field: | 104 # Override target field: |
| 85 # - hostname: 'autogen:<executor_id>': name of the bot that executed a job, | 105 # - hostname: 'autogen:<executor_id>': name of the bot that executed a job, |
| 86 # or an empty string. e.g. 'autogen:swarm42-m4'. | 106 # or an empty string. e.g. 'autogen:swarm42-m4'. |
| 87 # Value should be 'pending' or 'running'. Completed / canceled jobs should not | 107 # Value should be 'pending' or 'running'. Completed / canceled jobs should not |
| 88 # send this metric. | 108 # send this metric. |
| 89 jobs_running = gae_ts_mon.BooleanMetric( | 109 jobs_running = gae_ts_mon.BooleanMetric( |
| 90 'jobs/running', | 110 'jobs/running', |
| 91 description='Presence metric for a running job.') | 111 'Presence metric for a running job.', [ |
| 112 gae_ts_mon.StringField('spec_name'), |
| 113 gae_ts_mon.StringField('project_id'), |
| 114 gae_ts_mon.StringField('subproject_id'), |
| 115 ]) |
| 92 | 116 |
| 93 # Global metric. Metric fields: | 117 # Global metric. Metric fields: |
| 94 # - project_id: e.g. 'chromium' | 118 # - project_id: e.g. 'chromium' |
| 95 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 119 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 96 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 120 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 97 # for buildbot jobs. | 121 # for buildbot jobs. |
| 98 # - status: 'pending' or 'running'. | 122 # - status: 'pending' or 'running'. |
| 99 jobs_active = gae_ts_mon.GaugeMetric( | 123 jobs_active = gae_ts_mon.GaugeMetric( |
| 100 'jobs/active', | 124 'jobs/active', |
| 101 description='Number of running, pending or otherwise active jobs.') | 125 'Number of running, pending or otherwise active jobs.', [ |
| 126 gae_ts_mon.StringField('spec_name'), |
| 127 gae_ts_mon.StringField('project_id'), |
| 128 gae_ts_mon.StringField('subproject_id'), |
| 129 gae_ts_mon.StringField('status'), |
| 130 ]) |
| 102 | 131 |
| 103 | 132 |
| 104 # Global metric. Target field: hostname = 'autogen:<executor_id>' (bot id). | 133 # Global metric. Target field: hostname = 'autogen:<executor_id>' (bot id). |
| 105 executors_pool = gae_ts_mon.StringMetric( | 134 executors_pool = gae_ts_mon.StringMetric( |
| 106 'executors/pool', | 135 'executors/pool', |
| 107 description='Pool name for a given job executor.') | 136 'Pool name for a given job executor.', |
| 137 None) |
| 108 | 138 |
| 109 | 139 |
| 110 # Global metric. Target fields: | 140 # Global metric. Target fields: |
| 111 # - hostname = 'autogen:<executor_id>' (bot id). | 141 # - hostname = 'autogen:<executor_id>' (bot id). |
| 112 # Status value must be 'ready', 'running', or anything else, possibly | 142 # Status value must be 'ready', 'running', or anything else, possibly |
| 113 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or | 143 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or |
| 114 # 'dead'. | 144 # 'dead'. |
| 115 executors_status = gae_ts_mon.StringMetric( | 145 executors_status = gae_ts_mon.StringMetric( |
| 116 'executors/status', | 146 'executors/status', |
| 117 description=('Status of a job executor.')) | 147 'Status of a job executor.', |
| 148 None) |
| 118 | 149 |
| 119 | 150 |
| 120 # Global metric. Target fields: | 151 # Global metric. Target fields: |
| 121 # - hostname = 'autogen:<executor_id>' (bot id). | 152 # - hostname = 'autogen:<executor_id>' (bot id). |
| 122 # Status value must be 'ready', 'running', or anything else, possibly | 153 # Status value must be 'ready', 'running', or anything else, possibly |
| 123 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or | 154 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or |
| 124 # 'dead'. | 155 # 'dead'. |
| 125 # Note that 'running' will report data as long as the job is running, | 156 # Note that 'running' will report data as long as the job is running, |
| 126 # so it is best to restrict data to status == 'pending.' | 157 # so it is best to restrict data to status == 'pending.' |
| 127 jobs_pending_durations = gae_ts_mon.NonCumulativeDistributionMetric( | 158 jobs_pending_durations = gae_ts_mon.NonCumulativeDistributionMetric( |
| 128 'jobs/pending_durations', bucketer=_bucketer, | 159 'jobs/pending_durations', |
| 129 description='Pending times of active jobs, in seconds.') | 160 'Pending times of active jobs, in seconds.', [ |
| 161 gae_ts_mon.StringField('spec_name'), |
| 162 gae_ts_mon.StringField('project_id'), |
| 163 gae_ts_mon.StringField('subproject_id'), |
| 164 gae_ts_mon.StringField('status'), |
| 165 ], |
| 166 bucketer=_bucketer) |
| 130 | 167 |
| 131 | 168 |
| 132 # Global metric. Target fields: | 169 # Global metric. Target fields: |
| 133 # - hostname = 'autogen:<executor_id>' (bot id). | 170 # - hostname = 'autogen:<executor_id>' (bot id). |
| 134 # Status value must be 'ready', 'running', or anything else, possibly | 171 # Status value must be 'ready', 'running', or anything else, possibly |
| 135 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or | 172 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or |
| 136 # 'dead'. | 173 # 'dead'. |
| 137 # Note that 'running' will report data as long as the job is running, | 174 # Note that 'running' will report data as long as the job is running, |
| 138 # so it is best to restrict data to status == 'pending.' | 175 # so it is best to restrict data to status == 'pending.' |
| 139 jobs_max_pending_duration = gae_ts_mon.FloatMetric( | 176 jobs_max_pending_duration = gae_ts_mon.FloatMetric( |
| 140 'jobs/max_pending_duration', | 177 'jobs/max_pending_duration', |
| 141 description='Maximum pending seconds of pending jobs.') | 178 'Maximum pending seconds of pending jobs.', [ |
| 179 gae_ts_mon.StringField('spec_name'), |
| 180 gae_ts_mon.StringField('project_id'), |
| 181 gae_ts_mon.StringField('subproject_id'), |
| 182 gae_ts_mon.StringField('status'), |
| 183 ]) |
| 142 | 184 |
| 143 | 185 |
| 144 # Global metric. Target fields: | 186 # Global metric. Target fields: |
| 145 # - machine_type = server.lease_management.MachineType.key.id(). | 187 # - machine_type = server.lease_management.MachineType.key.id(). |
| 146 # - enabled = server.lease_management.MachineType.enabled. | 188 # - enabled = server.lease_management.MachineType.enabled. |
| 147 machine_types_target_size = gae_ts_mon.GaugeMetric( | 189 machine_types_target_size = gae_ts_mon.GaugeMetric( |
| 148 'swarming/machine_types/target_size', | 190 'swarming/machine_types/target_size', |
| 149 description='Target size of each MachineType leased from Machine Provider.') | 191 'Target size of each MachineType leased from Machine Provider.', [ |
| 192 gae_ts_mon.BooleanField('enabled'), |
| 193 gae_ts_mon.StringField('machine_type'), |
| 194 ]) |
| 150 | 195 |
| 151 | 196 |
| 152 def pool_from_dimensions(dimensions): | 197 def pool_from_dimensions(dimensions): |
| 153 """Return a canonical string of flattened dimensions.""" | 198 """Return a canonical string of flattened dimensions.""" |
| 154 iterables = (map(lambda x: '%s:%s' % (key, x), values) | 199 iterables = (map(lambda x: '%s:%s' % (key, x), values) |
| 155 for key, values in dimensions.iteritems() | 200 for key, values in dimensions.iteritems() |
| 156 if key not in IGNORED_DIMENSIONS) | 201 if key not in IGNORED_DIMENSIONS) |
| 157 return '|'.join(sorted(itertools.chain(*iterables))) | 202 return '|'.join(sorted(itertools.chain(*iterables))) |
| 158 | 203 |
| 159 | 204 |
| (...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 375 gae_ts_mon.register_global_metrics([ | 420 gae_ts_mon.register_global_metrics([ |
| 376 executors_pool, | 421 executors_pool, |
| 377 executors_status, | 422 executors_status, |
| 378 jobs_active, | 423 jobs_active, |
| 379 jobs_max_pending_duration, | 424 jobs_max_pending_duration, |
| 380 jobs_pending_durations, | 425 jobs_pending_durations, |
| 381 jobs_running, | 426 jobs_running, |
| 382 machine_types_target_size, | 427 machine_types_target_size, |
| 383 ]) | 428 ]) |
| 384 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics) | 429 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics) |
| OLD | NEW |