OLD | NEW |
| (Empty) |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import json | |
6 import logging | |
7 import os | |
8 import socket | |
9 import sys | |
10 import urlparse | |
11 import re | |
12 | |
13 import requests | |
14 | |
15 from infra_libs.ts_mon.common import interface | |
16 from infra_libs.ts_mon.common import metric_store | |
17 from infra_libs.ts_mon.common import monitors | |
18 from infra_libs.ts_mon.common import standard_metrics | |
19 from infra_libs.ts_mon.common import targets | |
20 | |
21 | |
22 def load_machine_config(filename): | |
23 if not os.path.exists(filename): | |
24 logging.info('Configuration file does not exist, ignoring: %s', filename) | |
25 return {} | |
26 | |
27 try: | |
28 with open(filename) as fh: | |
29 return json.load(fh) | |
30 except Exception: | |
31 logging.error('Configuration file couldn\'t be read: %s', filename) | |
32 raise | |
33 | |
34 | |
35 def _default_region(fqdn): | |
36 # Check if we're running in a GCE instance. | |
37 try: | |
38 r = requests.get( | |
39 'http://metadata.google.internal/computeMetadata/v1/instance/zone', | |
40 headers={'Metadata-Flavor': 'Google'}, | |
41 timeout=1.0) | |
42 except requests.exceptions.RequestException: | |
43 pass | |
44 else: | |
45 if r.status_code == requests.codes.ok: | |
46 # The zone is the last slash-separated component. | |
47 return r.text.split('/')[-1] | |
48 | |
49 try: | |
50 return fqdn.split('.')[1] # [chrome|golo] | |
51 except IndexError: | |
52 return '' | |
53 | |
54 | |
55 def _default_network(host): | |
56 try: | |
57 # Regular expression that matches the vast majority of our host names. | |
58 # Matches everything of the form 'masterN', 'masterNa', and 'foo-xN'. | |
59 return re.match(r'^([\w-]*?-[acm]|master)(\d+)a?$', host).group(2) # N | |
60 except AttributeError: | |
61 return '' | |
62 | |
63 | |
64 def add_argparse_options(parser): | |
65 """Add monitoring related flags to a process' argument parser. | |
66 | |
67 Args: | |
68 parser (argparse.ArgumentParser): the parser for the main process. | |
69 """ | |
70 if sys.platform == 'win32': # pragma: no cover | |
71 default_config_file = 'C:\\chrome-infra\\ts-mon.json' | |
72 else: # pragma: no cover | |
73 default_config_file = '/etc/chrome-infra/ts-mon.json' | |
74 | |
75 parser = parser.add_argument_group('Timeseries Monitoring Options') | |
76 parser.add_argument( | |
77 '--ts-mon-config-file', | |
78 default=default_config_file, | |
79 help='path to a JSON config file that contains suitable values for ' | |
80 '"endpoint" and "credentials" for this machine. This config file is ' | |
81 'intended to be shared by all processes on the machine, as the ' | |
82 'values depend on the machine\'s position in the network, IP ' | |
83 'whitelisting and deployment of credentials. (default: %(default)s)') | |
84 parser.add_argument( | |
85 '--ts-mon-endpoint', | |
86 help='url (including file://, pubsub://project/topic, https://) to post ' | |
87 'monitoring metrics to. If set, overrides the value in ' | |
88 '--ts-mon-config-file') | |
89 parser.add_argument( | |
90 '--ts-mon-credentials', | |
91 help='path to a pkcs8 json credential file. If set, overrides the value ' | |
92 'in --ts-mon-config-file') | |
93 parser.add_argument( | |
94 '--ts-mon-flush', | |
95 choices=('manual', 'auto'), default='auto', | |
96 help=('metric push behavior: manual (only send when flush() is called), ' | |
97 'or auto (send automatically every --ts-mon-flush-interval-secs ' | |
98 'seconds). (default: %(default)s)')) | |
99 parser.add_argument( | |
100 '--ts-mon-flush-interval-secs', | |
101 type=int, | |
102 default=60, | |
103 help=('automatically push metrics on this interval if ' | |
104 '--ts-mon-flush=auto.')) | |
105 parser.add_argument( | |
106 '--ts-mon-autogen-hostname', | |
107 action="store_true", | |
108 help=('Indicate that the hostname is autogenerated. ' | |
109 'This option must be set on autoscaled GCE VMs, Kubernetes pods, ' | |
110 'or any other hosts with dynamically generated names.')) | |
111 | |
112 parser.add_argument( | |
113 '--ts-mon-target-type', | |
114 choices=('device', 'task'), | |
115 default='device', | |
116 help='the type of target that is being monitored ("device" or "task").' | |
117 ' (default: %(default)s)') | |
118 | |
119 fqdn = socket.getfqdn().lower() # foo-[a|m]N.[chrome|golo].chromium.org | |
120 host = fqdn.split('.')[0] # foo-[a|m]N | |
121 region = _default_region(fqdn) | |
122 network = _default_network(host) | |
123 | |
124 parser.add_argument( | |
125 '--ts-mon-device-hostname', | |
126 default=host, | |
127 help='name of this device, (default: %(default)s)') | |
128 parser.add_argument( | |
129 '--ts-mon-device-region', | |
130 default=region, | |
131 help='name of the region this devices lives in. (default: %(default)s)') | |
132 parser.add_argument( | |
133 '--ts-mon-device-role', | |
134 default='default', | |
135 help='Role of the device. (default: %(default)s)') | |
136 parser.add_argument( | |
137 '--ts-mon-device-network', | |
138 default=network, | |
139 help='name of the network this device is connected to. ' | |
140 '(default: %(default)s)') | |
141 | |
142 parser.add_argument( | |
143 '--ts-mon-task-service-name', | |
144 help='name of the service being monitored') | |
145 parser.add_argument( | |
146 '--ts-mon-task-job-name', | |
147 help='name of this job instance of the task') | |
148 parser.add_argument( | |
149 '--ts-mon-task-region', | |
150 default=region, | |
151 help='name of the region in which this task is running ' | |
152 '(default: %(default)s)') | |
153 parser.add_argument( | |
154 '--ts-mon-task-hostname', | |
155 default=host, | |
156 help='name of the host on which this task is running ' | |
157 '(default: %(default)s)') | |
158 parser.add_argument( | |
159 '--ts-mon-task-number', type=int, default=0, | |
160 help='number (e.g. for replication) of this instance of this task ' | |
161 '(default: %(default)s)') | |
162 | |
163 parser.add_argument( | |
164 '--ts-mon-metric-name-prefix', | |
165 default='/chrome/infra/', | |
166 help='metric name prefix for all metrics (default: %(default)s)') | |
167 | |
168 def process_argparse_options(args): | |
169 """Process command line arguments to initialize the global monitor. | |
170 | |
171 Also initializes the default target. | |
172 | |
173 Starts a background thread to automatically flush monitoring metrics if not | |
174 disabled by command line arguments. | |
175 | |
176 Args: | |
177 args (argparse.Namespace): the result of parsing the command line arguments | |
178 """ | |
179 # Parse the config file if it exists. | |
180 config = load_machine_config(args.ts_mon_config_file) | |
181 endpoint = config.get('endpoint', '') | |
182 credentials = config.get('credentials', '') | |
183 autogen_hostname = config.get('autogen_hostname', False) | |
184 | |
185 # Command-line args override the values in the config file. | |
186 if args.ts_mon_endpoint is not None: | |
187 endpoint = args.ts_mon_endpoint | |
188 if args.ts_mon_credentials is not None: | |
189 credentials = args.ts_mon_credentials | |
190 | |
191 if args.ts_mon_target_type == 'device': | |
192 hostname = args.ts_mon_device_hostname | |
193 if args.ts_mon_autogen_hostname or autogen_hostname: | |
194 hostname = 'autogen:' + hostname | |
195 interface.state.target = targets.DeviceTarget( | |
196 args.ts_mon_device_region, | |
197 args.ts_mon_device_role, | |
198 args.ts_mon_device_network, | |
199 hostname) | |
200 if args.ts_mon_target_type == 'task': | |
201 # Reimplement ArgumentParser.error, since we don't have access to the parser | |
202 if not args.ts_mon_task_service_name: | |
203 print >> sys.stderr, ('Argument --ts-mon-task-service-name must be ' | |
204 'provided when the target type is "task".') | |
205 sys.exit(2) | |
206 if not args.ts_mon_task_job_name: | |
207 print >> sys.stderr, ('Argument --ts-mon-task-job-name must be provided ' | |
208 'when the target type is "task".') | |
209 sys.exit(2) | |
210 hostname = args.ts_mon_task_hostname | |
211 if args.ts_mon_autogen_hostname or autogen_hostname: | |
212 hostname = 'autogen:' + hostname | |
213 interface.state.target = targets.TaskTarget( | |
214 args.ts_mon_task_service_name, | |
215 args.ts_mon_task_job_name, | |
216 args.ts_mon_task_region, | |
217 hostname, | |
218 args.ts_mon_task_number) | |
219 | |
220 interface.state.metric_name_prefix = args.ts_mon_metric_name_prefix | |
221 interface.state.global_monitor = monitors.NullMonitor() | |
222 | |
223 if endpoint.startswith('file://'): | |
224 interface.state.global_monitor = monitors.DebugMonitor( | |
225 endpoint[len('file://'):]) | |
226 elif endpoint.startswith('pubsub://'): | |
227 if credentials: | |
228 url = urlparse.urlparse(endpoint) | |
229 project = url.netloc | |
230 topic = url.path.strip('/') | |
231 interface.state.global_monitor = monitors.PubSubMonitor( | |
232 credentials, project, topic, use_instrumented_http=True) | |
233 else: | |
234 logging.error('ts_mon monitoring is disabled because credentials are not ' | |
235 'available') | |
236 elif endpoint.startswith('https://'): | |
237 interface.state.global_monitor = monitors.HttpsMonitor(endpoint, | |
238 credentials) | |
239 elif endpoint.lower() == 'none': | |
240 logging.info('ts_mon monitoring has been explicitly disabled') | |
241 else: | |
242 logging.error('ts_mon monitoring is disabled because the endpoint provided' | |
243 ' is invalid or not supported: %s', endpoint) | |
244 | |
245 interface.state.flush_mode = args.ts_mon_flush | |
246 | |
247 if args.ts_mon_flush == 'auto': | |
248 interface.state.flush_thread = interface._FlushThread( | |
249 args.ts_mon_flush_interval_secs) | |
250 interface.state.flush_thread.start() | |
251 | |
252 standard_metrics.init() | |
OLD | NEW |