| OLD | NEW |
| 1 # Copyright 2016 The LUCI Authors. All rights reserved. | 1 # Copyright 2016 The LUCI Authors. All rights reserved. |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 | 2 # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 # that can be found in the LICENSE file. | 3 # that can be found in the LICENSE file. |
| 4 | 4 |
| 5 """Timeseries metrics.""" | 5 """Timeseries metrics.""" |
| 6 | 6 |
| 7 from collections import defaultdict | 7 from collections import defaultdict |
| 8 import datetime | 8 import datetime |
| 9 import itertools | 9 import itertools |
| 10 import json | 10 import json |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 # Override default target fields for app-global metrics. | 30 # Override default target fields for app-global metrics. |
| 31 TARGET_FIELDS = { | 31 TARGET_FIELDS = { |
| 32 'job_name': '', # module name | 32 'job_name': '', # module name |
| 33 'hostname': '', # version | 33 'hostname': '', # version |
| 34 'task_num': 0, # instance ID | 34 'task_num': 0, # instance ID |
| 35 } | 35 } |
| 36 | 36 |
| 37 # A custom bucketer with 12% resolution in the range of 1..10**5. | 37 # A custom bucketer with 12% resolution in the range of 1..10**5. |
| 38 # Used for job cycle times. | 38 # Used for job cycle times. |
| 39 _bucketer = gae_ts_mon.GeometricBucketer(growth_factor=10**0.05, | 39 _bucketer = gae_ts_mon.GeometricBucketer(growth_factor=10**0.05, |
| 40 num_finite_buckets=100) | 40 num_finite_buckets=100) |
| 41 | 41 |
| 42 # Regular (instance-local) metrics: jobs/completed and jobs/durations. | 42 # Regular (instance-local) metrics: jobs/completed and jobs/durations. |
| 43 # Both have the following metric fields: | 43 # Both have the following metric fields: |
| 44 # - project_id: e.g. 'chromium' | 44 # - project_id: e.g. 'chromium' |
| 45 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 45 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 46 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 46 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 47 # for buildbot jobs. | 47 # for buildbot jobs. |
| 48 # - result: one of 'success', 'failure', or 'infra-failure'. | 48 # - result: one of 'success', 'failure', or 'infra-failure'. |
| 49 jobs_completed = gae_ts_mon.CounterMetric( | 49 jobs_completed = gae_ts_mon.CounterMetric( |
| 50 'jobs/completed', | 50 'jobs/completed', |
| 51 'Number of completed jobs.', [ | 51 description='Number of completed jobs.') |
| 52 gae_ts_mon.StringField('spec_name'), | |
| 53 gae_ts_mon.StringField('project_id'), | |
| 54 gae_ts_mon.StringField('subproject_id'), | |
| 55 gae_ts_mon.StringField('result'), | |
| 56 ]) | |
| 57 | 52 |
| 58 | 53 |
| 59 jobs_durations = gae_ts_mon.CumulativeDistributionMetric( | 54 jobs_durations = gae_ts_mon.CumulativeDistributionMetric( |
| 60 'jobs/durations', | 55 'jobs/durations', bucketer=_bucketer, |
| 61 'Cycle times of completed jobs, in seconds.', [ | 56 description='Cycle times of completed jobs, in seconds.') |
| 62 gae_ts_mon.StringField('spec_name'), | |
| 63 gae_ts_mon.StringField('project_id'), | |
| 64 gae_ts_mon.StringField('subproject_id'), | |
| 65 gae_ts_mon.StringField('result'), | |
| 66 ], | |
| 67 bucketer=_bucketer) | |
| 68 | 57 |
| 69 | 58 |
| 70 # Similar to jobs/completed and jobs/duration, but with a dedup field. | 59 # Similar to jobs/completed and jobs/duration, but with a dedup field. |
| 71 # - project_id: e.g. 'chromium' | 60 # - project_id: e.g. 'chromium' |
| 72 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 61 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 73 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 62 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 74 # for buildbot jobs. | 63 # for buildbot jobs. |
| 75 # - deduped: boolean describing whether the job was deduped or not. | 64 # - deduped: boolean describing whether the job was deduped or not. |
| 76 jobs_requested = gae_ts_mon.CounterMetric( | 65 jobs_requested = gae_ts_mon.CounterMetric( |
| 77 'jobs/requested', | 66 'jobs/requested', |
| 78 'Number of requested jobs over time.', [ | 67 description='Number of requested jobs over time.') |
| 79 gae_ts_mon.StringField('spec_name'), | |
| 80 gae_ts_mon.StringField('project_id'), | |
| 81 gae_ts_mon.StringField('subproject_id'), | |
| 82 gae_ts_mon.BooleanField('deduped'), | |
| 83 ]) | |
| 84 | 68 |
| 85 | 69 |
| 86 # Swarming-specific metric. Metric fields: | 70 # Swarming-specific metric. Metric fields: |
| 87 # - project_id: e.g. 'chromium' | 71 # - project_id: e.g. 'chromium' |
| 88 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 72 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 89 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 73 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 90 # for buildbot jobs. | 74 # for buildbot jobs. |
| 91 tasks_expired = gae_ts_mon.CounterMetric( | 75 tasks_expired = gae_ts_mon.CounterMetric( |
| 92 'swarming/tasks/expired', | 76 'swarming/tasks/expired', |
| 93 'Number of expired tasks', [ | 77 description='Number of expired tasks') |
| 94 gae_ts_mon.StringField('spec_name'), | |
| 95 gae_ts_mon.StringField('project_id'), | |
| 96 gae_ts_mon.StringField('subproject_id'), | |
| 97 ]) | |
| 98 | 78 |
| 99 # Global metric. Metric fields: | 79 # Global metric. Metric fields: |
| 100 # - project_id: e.g. 'chromium' | 80 # - project_id: e.g. 'chromium' |
| 101 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 81 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 102 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 82 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 103 # for buildbot jobs. | 83 # for buildbot jobs. |
| 104 # Override target field: | 84 # Override target field: |
| 105 # - hostname: 'autogen:<executor_id>': name of the bot that executed a job, | 85 # - hostname: 'autogen:<executor_id>': name of the bot that executed a job, |
| 106 # or an empty string. e.g. 'autogen:swarm42-m4'. | 86 # or an empty string. e.g. 'autogen:swarm42-m4'. |
| 107 # Value should be 'pending' or 'running'. Completed / canceled jobs should not | 87 # Value should be 'pending' or 'running'. Completed / canceled jobs should not |
| 108 # send this metric. | 88 # send this metric. |
| 109 jobs_running = gae_ts_mon.BooleanMetric( | 89 jobs_running = gae_ts_mon.BooleanMetric( |
| 110 'jobs/running', | 90 'jobs/running', |
| 111 'Presence metric for a running job.', [ | 91 description='Presence metric for a running job.') |
| 112 gae_ts_mon.StringField('spec_name'), | |
| 113 gae_ts_mon.StringField('project_id'), | |
| 114 gae_ts_mon.StringField('subproject_id'), | |
| 115 ]) | |
| 116 | 92 |
| 117 # Global metric. Metric fields: | 93 # Global metric. Metric fields: |
| 118 # - project_id: e.g. 'chromium' | 94 # - project_id: e.g. 'chromium' |
| 119 # - subproject_id: e.g. 'blink'. Set to empty string if not used. | 95 # - subproject_id: e.g. 'blink'. Set to empty string if not used. |
| 120 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' | 96 # - spec_name: name of a job specification, e.g. '<master>:<builder>:<test>' |
| 121 # for buildbot jobs. | 97 # for buildbot jobs. |
| 122 # - status: 'pending' or 'running'. | 98 # - status: 'pending' or 'running'. |
| 123 jobs_active = gae_ts_mon.GaugeMetric( | 99 jobs_active = gae_ts_mon.GaugeMetric( |
| 124 'jobs/active', | 100 'jobs/active', |
| 125 'Number of running, pending or otherwise active jobs.', [ | 101 description='Number of running, pending or otherwise active jobs.') |
| 126 gae_ts_mon.StringField('spec_name'), | |
| 127 gae_ts_mon.StringField('project_id'), | |
| 128 gae_ts_mon.StringField('subproject_id'), | |
| 129 gae_ts_mon.StringField('status'), | |
| 130 ]) | |
| 131 | 102 |
| 132 | 103 |
| 133 # Global metric. Target field: hostname = 'autogen:<executor_id>' (bot id). | 104 # Global metric. Target field: hostname = 'autogen:<executor_id>' (bot id). |
| 134 executors_pool = gae_ts_mon.StringMetric( | 105 executors_pool = gae_ts_mon.StringMetric( |
| 135 'executors/pool', | 106 'executors/pool', |
| 136 'Pool name for a given job executor.', | 107 description='Pool name for a given job executor.') |
| 137 None) | |
| 138 | 108 |
| 139 | 109 |
| 140 # Global metric. Target fields: | 110 # Global metric. Target fields: |
| 141 # - hostname = 'autogen:<executor_id>' (bot id). | 111 # - hostname = 'autogen:<executor_id>' (bot id). |
| 142 # Status value must be 'ready', 'running', or anything else, possibly | 112 # Status value must be 'ready', 'running', or anything else, possibly |
| 143 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or | 113 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or |
| 144 # 'dead'. | 114 # 'dead'. |
| 145 executors_status = gae_ts_mon.StringMetric( | 115 executors_status = gae_ts_mon.StringMetric( |
| 146 'executors/status', | 116 'executors/status', |
| 147 'Status of a job executor.', | 117 description=('Status of a job executor.')) |
| 148 None) | |
| 149 | 118 |
| 150 | 119 |
| 151 # Global metric. Target fields: | 120 # Global metric. Target fields: |
| 152 # - hostname = 'autogen:<executor_id>' (bot id). | 121 # - hostname = 'autogen:<executor_id>' (bot id). |
| 153 # Status value must be 'ready', 'running', or anything else, possibly | 122 # Status value must be 'ready', 'running', or anything else, possibly |
| 154 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or | 123 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or |
| 155 # 'dead'. | 124 # 'dead'. |
| 156 # Note that 'running' will report data as long as the job is running, | 125 # Note that 'running' will report data as long as the job is running, |
| 157 # so it is best to restrict data to status == 'pending.' | 126 # so it is best to restrict data to status == 'pending.' |
| 158 jobs_pending_durations = gae_ts_mon.NonCumulativeDistributionMetric( | 127 jobs_pending_durations = gae_ts_mon.NonCumulativeDistributionMetric( |
| 159 'jobs/pending_durations', | 128 'jobs/pending_durations', bucketer=_bucketer, |
| 160 'Pending times of active jobs, in seconds.', [ | 129 description='Pending times of active jobs, in seconds.') |
| 161 gae_ts_mon.StringField('spec_name'), | |
| 162 gae_ts_mon.StringField('project_id'), | |
| 163 gae_ts_mon.StringField('subproject_id'), | |
| 164 gae_ts_mon.StringField('status'), | |
| 165 ], | |
| 166 bucketer=_bucketer) | |
| 167 | 130 |
| 168 | 131 |
| 169 # Global metric. Target fields: | 132 # Global metric. Target fields: |
| 170 # - hostname = 'autogen:<executor_id>' (bot id). | 133 # - hostname = 'autogen:<executor_id>' (bot id). |
| 171 # Status value must be 'ready', 'running', or anything else, possibly | 134 # Status value must be 'ready', 'running', or anything else, possibly |
| 172 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or | 135 # swarming-specific, when it cannot run a job. E.g. 'quarantined' or |
| 173 # 'dead'. | 136 # 'dead'. |
| 174 # Note that 'running' will report data as long as the job is running, | 137 # Note that 'running' will report data as long as the job is running, |
| 175 # so it is best to restrict data to status == 'pending.' | 138 # so it is best to restrict data to status == 'pending.' |
| 176 jobs_max_pending_duration = gae_ts_mon.FloatMetric( | 139 jobs_max_pending_duration = gae_ts_mon.FloatMetric( |
| 177 'jobs/max_pending_duration', | 140 'jobs/max_pending_duration', |
| 178 'Maximum pending seconds of pending jobs.', [ | 141 description='Maximum pending seconds of pending jobs.') |
| 179 gae_ts_mon.StringField('spec_name'), | |
| 180 gae_ts_mon.StringField('project_id'), | |
| 181 gae_ts_mon.StringField('subproject_id'), | |
| 182 gae_ts_mon.StringField('status'), | |
| 183 ]) | |
| 184 | 142 |
| 185 | 143 |
| 186 def pool_from_dimensions(dimensions): | 144 def pool_from_dimensions(dimensions): |
| 187 """Return a canonical string of flattened dimensions.""" | 145 """Return a canonical string of flattened dimensions.""" |
| 188 iterables = (map(lambda x: '%s:%s' % (key, x), values) | 146 iterables = (map(lambda x: '%s:%s' % (key, x), values) |
| 189 for key, values in dimensions.iteritems() | 147 for key, values in dimensions.iteritems() |
| 190 if key not in IGNORED_DIMENSIONS) | 148 if key not in IGNORED_DIMENSIONS) |
| 191 return '|'.join(sorted(itertools.chain(*iterables))) | 149 return '|'.join(sorted(itertools.chain(*iterables))) |
| 192 | 150 |
| 193 | 151 |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 406 def initialize(): | 364 def initialize(): |
| 407 gae_ts_mon.register_global_metrics([ | 365 gae_ts_mon.register_global_metrics([ |
| 408 executors_pool, | 366 executors_pool, |
| 409 executors_status, | 367 executors_status, |
| 410 jobs_active, | 368 jobs_active, |
| 411 jobs_max_pending_duration, | 369 jobs_max_pending_duration, |
| 412 jobs_pending_durations, | 370 jobs_pending_durations, |
| 413 jobs_running, | 371 jobs_running, |
| 414 ]) | 372 ]) |
| 415 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics) | 373 gae_ts_mon.register_global_metrics_callback('callback', _set_global_metrics) |
| OLD | NEW |