infra_libs/ts_mon/config.py - Issue 2213143002: Add infra_libs as a bootstrap dependency.

Side by Side Diff: infra_libs/ts_mon/config.py

Issue 2213143002: Add infra_libs as a bootstrap dependency. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 import json

6 import logging

7 import os

8 import socket

9 import sys

10 import urlparse

11 import re

12

13 import requests

14

15 from infra_libs.ts_mon.common import interface

16 from infra_libs.ts_mon.common import metric_store

17 from infra_libs.ts_mon.common import monitors

18 from infra_libs.ts_mon.common import standard_metrics

19 from infra_libs.ts_mon.common import targets

20

21

22 def load_machine_config(filename):

23 if not os.path.exists(filename):

24 logging.info('Configuration file does not exist, ignoring: %s', filename)

25 return {}

26

27 try:

28 with open(filename) as fh:

29 return json.load(fh)

30 except Exception:

31 logging.error('Configuration file couldn\'t be read: %s', filename)

32 raise

33

34

35 def _default_region(fqdn):

36 # Check if we're running in a GCE instance.

37 try:

38 r = requests.get(

39 'http://metadata.google.internal/computeMetadata/v1/instance/zone',

40 headers={'Metadata-Flavor': 'Google'},

41 timeout=1.0)

42 except requests.exceptions.RequestException:

43 pass

44 else:

45 if r.status_code == requests.codes.ok:

46 # The zone is the last slash-separated component.

47 return r.text.split('/')[-1]

48

49 try:

50 return fqdn.split('.')[1] # [chrome\|golo]

51 except IndexError:

52 return ''

53

54

55 def _default_network(host):

56 try:

57 # Regular expression that matches the vast majority of our host names.

58 # Matches everything of the form 'masterN', 'masterNa', and 'foo-xN'.

59 return re.match(r'^([\w-]*?-[acm]\|master)(\d+)a?$', host).group(2) # N

60 except AttributeError:

61 return ''

62

63

64 def add_argparse_options(parser):

65 """Add monitoring related flags to a process' argument parser.

66

67 Args:

68 parser (argparse.ArgumentParser): the parser for the main process.

69 """

70 if sys.platform == 'win32': # pragma: no cover

71 default_config_file = 'C:\\chrome-infra\\ts-mon.json'

72 else: # pragma: no cover

73 default_config_file = '/etc/chrome-infra/ts-mon.json'

74

75 parser = parser.add_argument_group('Timeseries Monitoring Options')

76 parser.add_argument(

77 '--ts-mon-config-file',

78 default=default_config_file,

79 help='path to a JSON config file that contains suitable values for '

80 '"endpoint" and "credentials" for this machine. This config file is '

81 'intended to be shared by all processes on the machine, as the '

82 'values depend on the machine\'s position in the network, IP '

83 'whitelisting and deployment of credentials. (default: %(default)s)')

84 parser.add_argument(

85 '--ts-mon-endpoint',

86 help='url (including file://, pubsub://project/topic, https://) to post '

87 'monitoring metrics to. If set, overrides the value in '

88 '--ts-mon-config-file')

89 parser.add_argument(

90 '--ts-mon-credentials',

91 help='path to a pkcs8 json credential file. If set, overrides the value '

92 'in --ts-mon-config-file')

93 parser.add_argument(

94 '--ts-mon-flush',

95 choices=('manual', 'auto'), default='auto',

96 help=('metric push behavior: manual (only send when flush() is called), '

97 'or auto (send automatically every --ts-mon-flush-interval-secs '

98 'seconds). (default: %(default)s)'))

99 parser.add_argument(

100 '--ts-mon-flush-interval-secs',

101 type=int,

102 default=60,

103 help=('automatically push metrics on this interval if '

104 '--ts-mon-flush=auto.'))

105 parser.add_argument(

106 '--ts-mon-autogen-hostname',

107 action="store_true",

108 help=('Indicate that the hostname is autogenerated. '

109 'This option must be set on autoscaled GCE VMs, Kubernetes pods, '

110 'or any other hosts with dynamically generated names.'))

111

112 parser.add_argument(

113 '--ts-mon-target-type',

114 choices=('device', 'task'),

115 default='device',

116 help='the type of target that is being monitored ("device" or "task").'

117 ' (default: %(default)s)')

118

119 fqdn = socket.getfqdn().lower() # foo-[a\|m]N.[chrome\|golo].chromium.org

120 host = fqdn.split('.')[0] # foo-[a\|m]N

121 region = _default_region(fqdn)

122 network = _default_network(host)

123

124 parser.add_argument(

125 '--ts-mon-device-hostname',

126 default=host,

127 help='name of this device, (default: %(default)s)')

128 parser.add_argument(

129 '--ts-mon-device-region',

130 default=region,

131 help='name of the region this devices lives in. (default: %(default)s)')

132 parser.add_argument(

133 '--ts-mon-device-role',

134 default='default',

135 help='Role of the device. (default: %(default)s)')

136 parser.add_argument(

137 '--ts-mon-device-network',

138 default=network,

139 help='name of the network this device is connected to. '

140 '(default: %(default)s)')

141

142 parser.add_argument(

143 '--ts-mon-task-service-name',

144 help='name of the service being monitored')

145 parser.add_argument(

146 '--ts-mon-task-job-name',

147 help='name of this job instance of the task')

148 parser.add_argument(

149 '--ts-mon-task-region',

150 default=region,

151 help='name of the region in which this task is running '

152 '(default: %(default)s)')

153 parser.add_argument(

154 '--ts-mon-task-hostname',

155 default=host,

156 help='name of the host on which this task is running '

157 '(default: %(default)s)')

158 parser.add_argument(

159 '--ts-mon-task-number', type=int, default=0,

160 help='number (e.g. for replication) of this instance of this task '

161 '(default: %(default)s)')

162

163 parser.add_argument(

164 '--ts-mon-metric-name-prefix',

165 default='/chrome/infra/',

166 help='metric name prefix for all metrics (default: %(default)s)')

167

168 def process_argparse_options(args):

169 """Process command line arguments to initialize the global monitor.

170

171 Also initializes the default target.

172

173 Starts a background thread to automatically flush monitoring metrics if not

174 disabled by command line arguments.

175

176 Args:

177 args (argparse.Namespace): the result of parsing the command line arguments

178 """

179 # Parse the config file if it exists.

180 config = load_machine_config(args.ts_mon_config_file)

181 endpoint = config.get('endpoint', '')

182 credentials = config.get('credentials', '')

183 autogen_hostname = config.get('autogen_hostname', False)

184

185 # Command-line args override the values in the config file.

186 if args.ts_mon_endpoint is not None:

187 endpoint = args.ts_mon_endpoint

188 if args.ts_mon_credentials is not None:

189 credentials = args.ts_mon_credentials

190

191 if args.ts_mon_target_type == 'device':

192 hostname = args.ts_mon_device_hostname

193 if args.ts_mon_autogen_hostname or autogen_hostname:

194 hostname = 'autogen:' + hostname

195 interface.state.target = targets.DeviceTarget(

196 args.ts_mon_device_region,

197 args.ts_mon_device_role,

198 args.ts_mon_device_network,

199 hostname)

200 if args.ts_mon_target_type == 'task':

201 # Reimplement ArgumentParser.error, since we don't have access to the parser

202 if not args.ts_mon_task_service_name:

203 print >> sys.stderr, ('Argument --ts-mon-task-service-name must be '

204 'provided when the target type is "task".')

205 sys.exit(2)

206 if not args.ts_mon_task_job_name:

207 print >> sys.stderr, ('Argument --ts-mon-task-job-name must be provided '

208 'when the target type is "task".')

209 sys.exit(2)

210 hostname = args.ts_mon_task_hostname

211 if args.ts_mon_autogen_hostname or autogen_hostname:

212 hostname = 'autogen:' + hostname

213 interface.state.target = targets.TaskTarget(

214 args.ts_mon_task_service_name,

215 args.ts_mon_task_job_name,

216 args.ts_mon_task_region,

217 hostname,

218 args.ts_mon_task_number)

219

220 interface.state.metric_name_prefix = args.ts_mon_metric_name_prefix

221 interface.state.global_monitor = monitors.NullMonitor()

222

223 if endpoint.startswith('file://'):

224 interface.state.global_monitor = monitors.DebugMonitor(

225 endpoint[len('file://'):])

226 elif endpoint.startswith('pubsub://'):

227 if credentials:

228 url = urlparse.urlparse(endpoint)

229 project = url.netloc

230 topic = url.path.strip('/')

231 interface.state.global_monitor = monitors.PubSubMonitor(

232 credentials, project, topic, use_instrumented_http=True)

233 else:

234 logging.error('ts_mon monitoring is disabled because credentials are not '

235 'available')

236 elif endpoint.startswith('https://'):

237 interface.state.global_monitor = monitors.HttpsMonitor(endpoint,

238 credentials)

239 elif endpoint.lower() == 'none':

240 logging.info('ts_mon monitoring has been explicitly disabled')

241 else:

242 logging.error('ts_mon monitoring is disabled because the endpoint provided'

243 ' is invalid or not supported: %s', endpoint)

244

245 interface.state.flush_mode = args.ts_mon_flush

246

247 if args.ts_mon_flush == 'auto':

248 interface.state.flush_thread = interface._FlushThread(

249 args.ts_mon_flush_interval_secs)

250 interface.state.flush_thread.start()

251

252 standard_metrics.init()

OLD	NEW

« appengine_module/gae_event_mon/__init__.py ('K') | « infra_libs/ts_mon/config.proto ('k') | infra_libs/ts_mon/protos/REAME.md » ('j') | no next file with comments »