| OLD | NEW | 
|---|
|  | (Empty) | 
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. |  | 
| 2 # Use of this source code is governed by a BSD-style license that can be |  | 
| 3 # found in the LICENSE file. |  | 
| 4 |  | 
| 5 import json |  | 
| 6 import logging |  | 
| 7 import os |  | 
| 8 import socket |  | 
| 9 import sys |  | 
| 10 import urlparse |  | 
| 11 import re |  | 
| 12 |  | 
| 13 import requests |  | 
| 14 |  | 
| 15 from infra_libs.ts_mon.common import interface |  | 
| 16 from infra_libs.ts_mon.common import metric_store |  | 
| 17 from infra_libs.ts_mon.common import monitors |  | 
| 18 from infra_libs.ts_mon.common import standard_metrics |  | 
| 19 from infra_libs.ts_mon.common import targets |  | 
| 20 |  | 
| 21 |  | 
| 22 def load_machine_config(filename): |  | 
| 23   if not os.path.exists(filename): |  | 
| 24     logging.info('Configuration file does not exist, ignoring: %s', filename) |  | 
| 25     return {} |  | 
| 26 |  | 
| 27   try: |  | 
| 28     with open(filename) as fh: |  | 
| 29       return json.load(fh) |  | 
| 30   except Exception: |  | 
| 31     logging.error('Configuration file couldn\'t be read: %s', filename) |  | 
| 32     raise |  | 
| 33 |  | 
| 34 |  | 
| 35 def _default_region(fqdn): |  | 
| 36   # Check if we're running in a GCE instance. |  | 
| 37   try: |  | 
| 38     r = requests.get( |  | 
| 39         'http://metadata.google.internal/computeMetadata/v1/instance/zone', |  | 
| 40         headers={'Metadata-Flavor': 'Google'}, |  | 
| 41         timeout=1.0) |  | 
| 42   except requests.exceptions.RequestException: |  | 
| 43     pass |  | 
| 44   else: |  | 
| 45     if r.status_code == requests.codes.ok: |  | 
| 46       # The zone is the last slash-separated component. |  | 
| 47       return r.text.split('/')[-1] |  | 
| 48 |  | 
| 49   try: |  | 
| 50     return fqdn.split('.')[1]  # [chrome|golo] |  | 
| 51   except IndexError: |  | 
| 52     return '' |  | 
| 53 |  | 
| 54 |  | 
| 55 def _default_network(host): |  | 
| 56   try: |  | 
| 57     # Regular expression that matches the vast majority of our host names. |  | 
| 58     # Matches everything of the form 'masterN', 'masterNa', and 'foo-xN'. |  | 
| 59     return re.match(r'^([\w-]*?-[acm]|master)(\d+)a?$', host).group(2)  # N |  | 
| 60   except AttributeError: |  | 
| 61     return '' |  | 
| 62 |  | 
| 63 |  | 
| 64 def add_argparse_options(parser): |  | 
| 65   """Add monitoring related flags to a process' argument parser. |  | 
| 66 |  | 
| 67   Args: |  | 
| 68     parser (argparse.ArgumentParser): the parser for the main process. |  | 
| 69   """ |  | 
| 70   if sys.platform == 'win32':  # pragma: no cover |  | 
| 71     default_config_file = 'C:\\chrome-infra\\ts-mon.json' |  | 
| 72   else:  # pragma: no cover |  | 
| 73     default_config_file = '/etc/chrome-infra/ts-mon.json' |  | 
| 74 |  | 
| 75   parser = parser.add_argument_group('Timeseries Monitoring Options') |  | 
| 76   parser.add_argument( |  | 
| 77       '--ts-mon-config-file', |  | 
| 78       default=default_config_file, |  | 
| 79       help='path to a JSON config file that contains suitable values for ' |  | 
| 80            '"endpoint" and "credentials" for this machine. This config file is ' |  | 
| 81            'intended to be shared by all processes on the machine, as the ' |  | 
| 82            'values depend on the machine\'s position in the network, IP ' |  | 
| 83            'whitelisting and deployment of credentials. (default: %(default)s)') |  | 
| 84   parser.add_argument( |  | 
| 85       '--ts-mon-endpoint', |  | 
| 86       help='url (including file://, pubsub://project/topic, https://) to post ' |  | 
| 87            'monitoring metrics to. If set, overrides the value in ' |  | 
| 88            '--ts-mon-config-file') |  | 
| 89   parser.add_argument( |  | 
| 90       '--ts-mon-credentials', |  | 
| 91       help='path to a pkcs8 json credential file. If set, overrides the value ' |  | 
| 92            'in --ts-mon-config-file') |  | 
| 93   parser.add_argument( |  | 
| 94       '--ts-mon-flush', |  | 
| 95       choices=('manual', 'auto'), default='auto', |  | 
| 96       help=('metric push behavior: manual (only send when flush() is called), ' |  | 
| 97             'or auto (send automatically every --ts-mon-flush-interval-secs ' |  | 
| 98             'seconds). (default: %(default)s)')) |  | 
| 99   parser.add_argument( |  | 
| 100       '--ts-mon-flush-interval-secs', |  | 
| 101       type=int, |  | 
| 102       default=60, |  | 
| 103       help=('automatically push metrics on this interval if ' |  | 
| 104             '--ts-mon-flush=auto.')) |  | 
| 105   parser.add_argument( |  | 
| 106       '--ts-mon-autogen-hostname', |  | 
| 107       action="store_true", |  | 
| 108       help=('Indicate that the hostname is autogenerated. ' |  | 
| 109             'This option must be set on autoscaled GCE VMs, Kubernetes pods, ' |  | 
| 110             'or any other hosts with dynamically generated names.')) |  | 
| 111 |  | 
| 112   parser.add_argument( |  | 
| 113       '--ts-mon-target-type', |  | 
| 114       choices=('device', 'task'), |  | 
| 115       default='device', |  | 
| 116       help='the type of target that is being monitored ("device" or "task").' |  | 
| 117            ' (default: %(default)s)') |  | 
| 118 |  | 
| 119   fqdn = socket.getfqdn().lower()  # foo-[a|m]N.[chrome|golo].chromium.org |  | 
| 120   host = fqdn.split('.')[0]  # foo-[a|m]N |  | 
| 121   region = _default_region(fqdn) |  | 
| 122   network = _default_network(host) |  | 
| 123 |  | 
| 124   parser.add_argument( |  | 
| 125       '--ts-mon-device-hostname', |  | 
| 126       default=host, |  | 
| 127       help='name of this device, (default: %(default)s)') |  | 
| 128   parser.add_argument( |  | 
| 129       '--ts-mon-device-region', |  | 
| 130       default=region, |  | 
| 131       help='name of the region this devices lives in. (default: %(default)s)') |  | 
| 132   parser.add_argument( |  | 
| 133       '--ts-mon-device-role', |  | 
| 134       default='default', |  | 
| 135       help='Role of the device. (default: %(default)s)') |  | 
| 136   parser.add_argument( |  | 
| 137       '--ts-mon-device-network', |  | 
| 138       default=network, |  | 
| 139       help='name of the network this device is connected to. ' |  | 
| 140            '(default: %(default)s)') |  | 
| 141 |  | 
| 142   parser.add_argument( |  | 
| 143       '--ts-mon-task-service-name', |  | 
| 144       help='name of the service being monitored') |  | 
| 145   parser.add_argument( |  | 
| 146       '--ts-mon-task-job-name', |  | 
| 147       help='name of this job instance of the task') |  | 
| 148   parser.add_argument( |  | 
| 149       '--ts-mon-task-region', |  | 
| 150       default=region, |  | 
| 151       help='name of the region in which this task is running ' |  | 
| 152            '(default: %(default)s)') |  | 
| 153   parser.add_argument( |  | 
| 154       '--ts-mon-task-hostname', |  | 
| 155       default=host, |  | 
| 156       help='name of the host on which this task is running ' |  | 
| 157            '(default: %(default)s)') |  | 
| 158   parser.add_argument( |  | 
| 159       '--ts-mon-task-number', type=int, default=0, |  | 
| 160       help='number (e.g. for replication) of this instance of this task ' |  | 
| 161            '(default: %(default)s)') |  | 
| 162 |  | 
| 163   parser.add_argument( |  | 
| 164       '--ts-mon-metric-name-prefix', |  | 
| 165       default='/chrome/infra/', |  | 
| 166       help='metric name prefix for all metrics (default: %(default)s)') |  | 
| 167 |  | 
| 168 def process_argparse_options(args): |  | 
| 169   """Process command line arguments to initialize the global monitor. |  | 
| 170 |  | 
| 171   Also initializes the default target. |  | 
| 172 |  | 
| 173   Starts a background thread to automatically flush monitoring metrics if not |  | 
| 174   disabled by command line arguments. |  | 
| 175 |  | 
| 176   Args: |  | 
| 177     args (argparse.Namespace): the result of parsing the command line arguments |  | 
| 178   """ |  | 
| 179   # Parse the config file if it exists. |  | 
| 180   config = load_machine_config(args.ts_mon_config_file) |  | 
| 181   endpoint = config.get('endpoint', '') |  | 
| 182   credentials = config.get('credentials', '') |  | 
| 183   autogen_hostname = config.get('autogen_hostname', False) |  | 
| 184 |  | 
| 185   # Command-line args override the values in the config file. |  | 
| 186   if args.ts_mon_endpoint is not None: |  | 
| 187     endpoint = args.ts_mon_endpoint |  | 
| 188   if args.ts_mon_credentials is not None: |  | 
| 189     credentials = args.ts_mon_credentials |  | 
| 190 |  | 
| 191   if args.ts_mon_target_type == 'device': |  | 
| 192     hostname = args.ts_mon_device_hostname |  | 
| 193     if args.ts_mon_autogen_hostname or autogen_hostname: |  | 
| 194       hostname = 'autogen:' + hostname |  | 
| 195     interface.state.target = targets.DeviceTarget( |  | 
| 196         args.ts_mon_device_region, |  | 
| 197         args.ts_mon_device_role, |  | 
| 198         args.ts_mon_device_network, |  | 
| 199         hostname) |  | 
| 200   if args.ts_mon_target_type == 'task': |  | 
| 201     # Reimplement ArgumentParser.error, since we don't have access to the parser |  | 
| 202     if not args.ts_mon_task_service_name: |  | 
| 203       print >> sys.stderr, ('Argument --ts-mon-task-service-name must be ' |  | 
| 204                             'provided when the target type is "task".') |  | 
| 205       sys.exit(2) |  | 
| 206     if not args.ts_mon_task_job_name: |  | 
| 207       print >> sys.stderr, ('Argument --ts-mon-task-job-name must be provided ' |  | 
| 208                             'when the target type is "task".') |  | 
| 209       sys.exit(2) |  | 
| 210     hostname = args.ts_mon_task_hostname |  | 
| 211     if args.ts_mon_autogen_hostname or autogen_hostname: |  | 
| 212       hostname = 'autogen:' + hostname |  | 
| 213     interface.state.target = targets.TaskTarget( |  | 
| 214         args.ts_mon_task_service_name, |  | 
| 215         args.ts_mon_task_job_name, |  | 
| 216         args.ts_mon_task_region, |  | 
| 217         hostname, |  | 
| 218         args.ts_mon_task_number) |  | 
| 219 |  | 
| 220   interface.state.metric_name_prefix = args.ts_mon_metric_name_prefix |  | 
| 221   interface.state.global_monitor = monitors.NullMonitor() |  | 
| 222 |  | 
| 223   if endpoint.startswith('file://'): |  | 
| 224     interface.state.global_monitor = monitors.DebugMonitor( |  | 
| 225         endpoint[len('file://'):]) |  | 
| 226   elif endpoint.startswith('pubsub://'): |  | 
| 227     if credentials: |  | 
| 228       url = urlparse.urlparse(endpoint) |  | 
| 229       project = url.netloc |  | 
| 230       topic = url.path.strip('/') |  | 
| 231       interface.state.global_monitor = monitors.PubSubMonitor( |  | 
| 232           credentials, project, topic, use_instrumented_http=True) |  | 
| 233     else: |  | 
| 234       logging.error('ts_mon monitoring is disabled because credentials are not ' |  | 
| 235                     'available') |  | 
| 236   elif endpoint.startswith('https://'): |  | 
| 237     interface.state.global_monitor = monitors.HttpsMonitor(endpoint, |  | 
| 238                                                            credentials) |  | 
| 239   elif endpoint.lower() == 'none': |  | 
| 240     logging.info('ts_mon monitoring has been explicitly disabled') |  | 
| 241   else: |  | 
| 242     logging.error('ts_mon monitoring is disabled because the endpoint provided' |  | 
| 243                   ' is invalid or not supported: %s', endpoint) |  | 
| 244 |  | 
| 245   interface.state.flush_mode = args.ts_mon_flush |  | 
| 246 |  | 
| 247   if args.ts_mon_flush == 'auto': |  | 
| 248     interface.state.flush_thread = interface._FlushThread( |  | 
| 249         args.ts_mon_flush_interval_secs) |  | 
| 250     interface.state.flush_thread.start() |  | 
| 251 |  | 
| 252   standard_metrics.init() |  | 
| OLD | NEW | 
|---|