Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(184)

Side by Side Diff: infra_libs/event_mon/monitoring.py

Issue 2213143002: Add infra_libs as a bootstrap dependency. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Removed the ugly import hack Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import logging
6
7 from google.protobuf.message import DecodeError
8 from infra_libs.event_mon.protos.chrome_infra_log_pb2 import (
9 ChromeInfraEvent, ServiceEvent, BuildEvent)
10 from infra_libs.event_mon.protos.goma_stats_pb2 import GomaStats
11 from infra_libs.event_mon.protos.log_request_lite_pb2 import LogRequestLite
12 from infra_libs.event_mon import config, router
13
14
15 # These constants are part of the API.
16 EVENT_TYPES = ('START', 'STOP', 'UPDATE', 'CURRENT_VERSION', 'CRASH')
17 BUILD_EVENT_TYPES = ('SCHEDULER', 'BUILD', 'STEP')
18 BUILD_RESULTS = ('UNKNOWN', 'SUCCESS', 'FAILURE', 'INFRA_FAILURE',
19 'WARNING', 'SKIPPED', 'RETRY')
20 TIMESTAMP_KINDS = ('UNKNOWN', 'POINT', 'BEGIN', 'END')
21 GOMA_ERROR_TYPES = ('GOMA_ERROR_OK', 'GOMA_ERROR_UNKNOWN', 'GOMA_ERROR_CRASHED',
22 'GOMA_ERROR_LOG_FATAL')
23
24 # Maximum size of stack trace sent in an event, in characters.
25 STACK_TRACE_MAX_SIZE = 1000
26
27
28 class Event(object):
29 """Wraps the event proto with the necessary boilerplate code."""
30
31 def __init__(self, timestamp_kind=None,
32 event_timestamp_ms=None, service_name=None):
33 """
34 Args:
35 timestamp_kind (string): 'POINT', 'START' or 'STOP'.
36 event_timestamp_ms (int or float): time of the event in milliseconds
37 from Unix epoch. Default: now.
38 service_name (string): name of the monitored service.
39 """
40 self._timestamp_ms = event_timestamp_ms
41 self._event = _get_chrome_infra_event(
42 timestamp_kind, service_name=service_name)
43
44 @property
45 def is_null(self):
46 return self.proto is None
47
48 @staticmethod
49 def null():
50 """Create an "null" Event, without the proto.
51
52 Null event's send() method will fail (return False). This is useful for
53 returning a consistent object type from helper functions even in the
54 case of failure.
55 """
56 event = Event()
57 event._event = None
58 return event
59
60 @property
61 def proto(self):
62 return self._event
63
64 def log_event(self):
65 if self.is_null:
66 return None
67 return _get_log_event_lite(
68 self.proto, event_timestamp=self._timestamp_ms)
69
70 def send(self):
71 if self.proto is None:
72 return False
73 return config._router.push_event(self.log_event())
74
75
76 def _get_chrome_infra_event(timestamp_kind, service_name=None):
77 """Compute a basic event.
78
79 Validates the inputs and returns a pre-filled ChromeInfraEvent or
80 None if any check failed.
81
82 The proto is filled using values provided in setup_monitoring() at
83 initialization time, and args.
84
85 Args:
86 timestamp_kind (string): any of ('POINT', 'BEGIN', 'END').
87
88 Returns:
89 event (chrome_infra_log_pb2.ChromeInfraEvent):
90 """
91 # Testing for None because we want an error message when timestamp_kind == ''.
92 if timestamp_kind is not None and timestamp_kind not in TIMESTAMP_KINDS:
93 logging.error('Invalid value for timestamp_kind: %s', timestamp_kind)
94 return None
95
96 # We must accept unicode here.
97 if service_name is not None and not isinstance(service_name, basestring):
98 logging.error('Invalid type for service_name: %s', type(service_name))
99 return None
100
101 event = ChromeInfraEvent()
102 event.CopyFrom(config._cache['default_event'])
103
104 if timestamp_kind:
105 event.timestamp_kind = ChromeInfraEvent.TimestampKind.Value(timestamp_kind)
106 if service_name:
107 event.event_source.service_name = service_name
108
109 return event
110
111
112 def _get_log_event_lite(chrome_infra_event, event_timestamp=None):
113 """Wraps a ChromeInfraEvent into a LogEventLite.
114
115 Args:
116 event_timestamp (int or float): timestamp of when the event happened
117 as a number of milliseconds since the epoch. If None, the current time
118 is used.
119
120 Returns:
121 log_event (log_request_lite_pb2.LogRequestLite.LogEventLite):
122 """
123 if not isinstance(event_timestamp, (int, float, None.__class__ )):
124 logging.error('Invalid type for event_timestamp. Needs a number, got %s',
125 type(event_timestamp))
126 return None
127
128 log_event = LogRequestLite.LogEventLite()
129 log_event.event_time_ms = int(event_timestamp or router.time_ms())
130 log_event.source_extension = chrome_infra_event.SerializeToString()
131 return log_event
132
133
134 def _get_service_event(event_type,
135 timestamp_kind=None,
136 event_timestamp=None,
137 code_version=None,
138 stack_trace=None,
139 service_name=None):
140 """Compute a ChromeInfraEvent filled with a ServiceEvent.
141 Arguments are identical to those in send_service_event(), please refer
142 to this docstring.
143
144 Returns:
145 event (Event): can be a "null" Event if there is a major processing issue.
146 """
147 if event_type not in EVENT_TYPES:
148 logging.error('Invalid value for event_type: %s', event_type)
149 return Event.null()
150
151 if timestamp_kind is None:
152 timestamp_kind = 'POINT'
153 if event_type == 'START':
154 timestamp_kind = 'BEGIN'
155 elif event_type == 'STOP':
156 timestamp_kind = 'END'
157 elif event_type == 'CRASH':
158 timestamp_kind = 'END'
159
160 event_wrapper = Event(timestamp_kind, event_timestamp, service_name)
161 if event_wrapper.is_null:
162 return event_wrapper
163
164 event = event_wrapper.proto
165
166 event.service_event.type = getattr(ServiceEvent, event_type)
167
168 if code_version is None:
169 code_version = ()
170 if not isinstance(code_version, (tuple, list)):
171 logging.error('Invalid type provided to code_version argument in '
172 '_get_service_event. Please fix the calling code. '
173 'Type provided: %s, expected list, tuple or None.',
174 type(code_version))
175 code_version = ()
176
177 for version_d in code_version:
178 try:
179 if 'source_url' not in version_d:
180 logging.error('source_url missing in %s', version_d)
181 continue
182
183 version = event.service_event.code_version.add()
184 version.source_url = version_d['source_url']
185 if 'revision' in version_d:
186 # Rely on the url to switch between svn and git because an
187 # abbreviated sha1 can sometimes be confused with an int.
188 if version.source_url.startswith('svn://'):
189 version.svn_revision = int(version_d['revision'])
190 else:
191 version.git_hash = version_d['revision']
192
193 if 'version' in version_d:
194 version.version = version_d['version']
195 if 'dirty' in version_d:
196 version.dirty = version_d['dirty']
197
198 except TypeError:
199 logging.exception('Invalid type provided to code_version argument in '
200 '_get_service_event. Please fix the calling code.')
201 continue
202
203 if isinstance(stack_trace, basestring):
204 if event_type != 'CRASH':
205 logging.error('stack_trace provide for an event different from CRASH.'
206 ' Got: %s', event_type)
207 event.service_event.stack_trace = stack_trace[-STACK_TRACE_MAX_SIZE:]
208 else:
209 if stack_trace is not None:
210 logging.error('stack_trace should be a string, got %s',
211 stack_trace.__class__.__name__)
212
213 return event_wrapper
214
215
216 def send_service_event(event_type,
217 timestamp_kind=None,
218 event_timestamp=None,
219 code_version=(),
220 stack_trace=None):
221 """Send service event.
222
223 Args:
224 event_type (string): any name of enum ServiceEvent.ServiceEventType.
225 ('START', 'STOP', 'UPDATE', 'CURRENT_VERSION', 'CRASH')
226
227 Keyword Args:
228 timestamp_kind (string): any of ('POINT', 'BEGIN', 'END').
229
230 event_timestamp (int or float): timestamp of when the event happened
231 as a number of milliseconds since the epoch. If not provided, the
232 current time is used.
233
234 code_version (list/tuple of dict or None): required keys are
235 'source_url' -> full url to the repository
236 'revision' -> (string) git sha1 or svn revision number.
237 optional keys are
238 'dirty' -> boolean. True if the local source tree has local
239 modification.
240 'version' -> manually-set version number (like 'v2.6.0')
241
242 stack_trace (str): when event_type is 'CRASH', stack trace of the crash
243 as a string. String is truncated to 1000 characters (the last ones
244 are kept). Use traceback.format_exc() to get the stack trace from an
245 exception handler.
246
247 Returns:
248 success (bool): False if some error happened.
249 """
250 return _get_service_event(event_type=event_type,
251 timestamp_kind=timestamp_kind,
252 service_name=None,
253 event_timestamp=event_timestamp,
254 code_version=code_version,
255 stack_trace=stack_trace).send()
256
257
258 def get_build_event(event_type,
259 hostname,
260 build_name,
261 build_number=None,
262 build_scheduling_time=None,
263 step_name=None,
264 step_text=None,
265 step_number=None,
266 result=None,
267 extra_result_code=None,
268 timestamp_kind=None,
269 event_timestamp=None,
270 service_name=None,
271 goma_stats=None,
272 goma_error=None,
273 goma_crash_report_id=None,
274 patch_url=None,
275 bbucket_id=None,
276 category=None):
277 """Compute a ChromeInfraEvent filled with a BuildEvent.
278
279 Arguments are identical to those in send_build_event(), please refer
280 to this docstring.
281
282 Returns:
283 event (log_request_lite_pb2.LogRequestLite.LogEventLite): can be None
284 if there is a major processing issue.
285 """
286 if event_type not in BUILD_EVENT_TYPES:
287 logging.error('Invalid value for event_type: %s', event_type)
288 return Event.null()
289
290 event_wrapper = Event(timestamp_kind, event_timestamp,
291 service_name=service_name)
292 if event_wrapper.is_null:
293 return event_wrapper
294
295 event = event_wrapper.proto
296 event.build_event.type = BuildEvent.BuildEventType.Value(event_type)
297
298 if hostname:
299 event.build_event.host_name = hostname
300 if not event.build_event.HasField('host_name'):
301 logging.error('hostname must be provided, got %s', hostname)
302
303 if build_name:
304 event.build_event.build_name = build_name
305 if not event.build_event.HasField('build_name'):
306 logging.error('build_name must be provided, got %s', build_name)
307
308 # 0 is a valid value for build_number
309 if build_number is not None:
310 event.build_event.build_number = build_number
311
312 # 0 is not a valid scheduling time
313 if build_scheduling_time:
314 event.build_event.build_scheduling_time_ms = build_scheduling_time
315
316 if event.build_event.HasField('build_number'):
317 if event_type == 'SCHEDULER':
318 logging.error('build_number should not be provided for a "SCHEDULER"'
319 ' type, got %s (drop or use BUILD or STEP type)',
320 build_number)
321
322 if not event.build_event.HasField('build_scheduling_time_ms'):
323 logging.error('build_number has been provided (%s), '
324 'build_scheduling_time was not. '
325 'Provide either both or none.',
326 event.build_event.build_number)
327 else: # no 'build_number' field
328 if event.build_event.HasField('build_scheduling_time_ms'):
329 logging.error('build_number has not been provided, '
330 'build_scheduling_time was provided (%s). '
331 'Both must be present or missing.',
332 event.build_event.build_scheduling_time_ms)
333
334 if step_name:
335 event.build_event.step_name = str(step_name)
336 if step_text:
337 event.build_event.step_text = str(step_text)
338
339 if step_number is not None:
340 event.build_event.step_number = step_number
341 if patch_url is not None:
342 event.build_event.patch_url = patch_url
343 if bbucket_id is not None:
344 try:
345 event.build_event.bbucket_id = int(bbucket_id)
346 except (ValueError, TypeError):
347 pass
348
349 if category:
350 event.build_event.category = {
351 'cq': BuildEvent.CATEGORY_CQ,
352 'cq_experimental': BuildEvent.CATEGORY_CQ_EXPERIMENTAL,
353 'git_cl_try': BuildEvent.CATEGORY_GIT_CL_TRY,
354 }.get(category.lower(), BuildEvent.CATEGORY_UNKNOWN)
355
356
357 if event.build_event.step_name:
358 if event_type != 'STEP':
359 logging.error('step_name should be provided only for type "STEP", '
360 'got %s', event_type)
361 if not event.build_event.HasField('step_number'):
362 logging.error('step_number was not provided, but got a value for '
363 'step_name (%s). Provide either both or none',
364 step_name)
365 if (not event.build_event.HasField('build_number')
366 and not event.build_event.HasField('build_scheduling_time_ms')):
367 logging.error('build information must be provided when step '
368 'information is provided. Got nothing in build_name '
369 'and build_number')
370 else:
371 if event.build_event.HasField('step_number'):
372 logging.error('step_number has been provided (%s), '
373 'step_name has not. '
374 'Both must be present or missing.',
375 event.build_event.step_number)
376
377 # TODO(pgervais) remove this.
378 # Hack to work around errors in the proto
379 mapping = {'WARNINGS': 'WARNING', 'EXCEPTION': 'INFRA_FAILURE'}
380 result = mapping.get(result, result)
381
382 if result is not None: # we want an error message if result==''.
383 if result not in BUILD_RESULTS:
384 logging.error('Invalid value for result: %s', result)
385 else:
386 event.build_event.result = getattr(BuildEvent, result)
387
388 if event_type == 'SCHEDULER':
389 logging.error('A result was provided for a "SCHEDULER" event type '
390 '(%s). This is only accepted for BUILD and TEST types.',
391 result)
392
393 if isinstance(extra_result_code, basestring):
394 extra_result_code = (extra_result_code, )
395 if not isinstance(extra_result_code, (list, tuple)):
396 if extra_result_code is not None:
397 logging.error('extra_result_code must be a string or list of strings. '
398 'Got %s' % type(extra_result_code))
399 else:
400 non_strings = []
401 extra_result_strings = []
402 for s in extra_result_code:
403 if not isinstance(s, basestring):
404 non_strings.append(s)
405 else:
406 extra_result_strings.append(s)
407
408 if non_strings:
409 logging.error('some values provided to extra_result_code are not strings:'
410 ' %s' % str(non_strings))
411 for s in extra_result_strings:
412 event.build_event.extra_result_code.append(s)
413
414 if goma_stats:
415 if isinstance(goma_stats, GomaStats):
416 event.build_event.goma_stats.MergeFrom(goma_stats)
417 else:
418 logging.error('expected goma_stats to be an instance of GomaStats, '
419 'got %s', type(goma_stats))
420 if goma_error:
421 if goma_stats:
422 logging.error('Only one of goma_error and goma_stats can be provided. '
423 'Got %s and %s.', goma_error, goma_stats)
424 event.build_event.goma_error = BuildEvent.GomaErrorType.Value(goma_error)
425 if goma_crash_report_id:
426 event.build_event.goma_crash_report_id = goma_crash_report_id
427 if goma_error != 'GOMA_ERROR_CRASHED':
428 logging.error('A crash report id (%s) was provided for GomaErrorType '
429 '(%s). This is only accepted for GOMA_ERROR_CRASHED '
430 'type.', goma_crash_report_id, goma_error)
431
432 return event_wrapper
433
434
435 def send_build_event(event_type,
436 hostname,
437 build_name,
438 build_number=None,
439 build_scheduling_time=None,
440 step_name=None,
441 step_text=None,
442 step_number=None,
443 result=None,
444 extra_result_code=None,
445 timestamp_kind=None,
446 event_timestamp=None,
447 goma_stats=None,
448 goma_error=None,
449 goma_crash_report_id=None,
450 patch_url=None,
451 bbucket_id=None,
452 category=None):
453 """Send a ChromeInfraEvent filled with a BuildEvent
454
455 Args:
456 event_type (string): any name of enum BuildEvent.BuildEventType.
457 (listed in infra_libs.event_mon.monitoring.BUILD_EVENT_TYPES)
458 hostname (string): fqdn of the machine that is running the build / step.
459 aka the bot name.
460 build_name (string): name of the builder.
461
462 Keyword args:
463 build_number (int): as the name says.
464 build_scheduling_time (int): timestamp telling when the build was
465 scheduled. This is required when build_number is provided to make it
466 possibly to distinguish two builds with the same build number.
467 step_name (str): name of the step.
468 step_text (str): text of the step.
469 step_number (int): rank of the step in the build. This is mandatory
470 if step_name is provided, because step_name is not enough to tell the
471 order.
472 result (string): any name of enum BuildEvent.BuildResult.
473 (listed in infra_libs.event_mon.monitoring.BUILD_RESULTS)
474 extra_result_code (string or list of): arbitrary strings intended to provide
475 more fine-grained information about the result.
476 goma_stats (goma_stats_pb2.GomaStats): statistics output by the Goma proxy.
477 goma_error (string): goma error type defined as GomaErrorType.
478 goma_crash_report_id (string): id of goma crash report.
479 patch_url (string): URL of the patch that triggered build
480 bbucket_id (long): Buildbucket ID of the build.
481 category (string): Build category, e.g. cq or git_cl_try.
482
483 Returns:
484 success (bool): False if some error happened.
485 """
486 return get_build_event(event_type,
487 hostname,
488 build_name,
489 build_number=build_number,
490 build_scheduling_time=build_scheduling_time,
491 step_name=step_name,
492 step_text=step_text,
493 step_number=step_number,
494 result=result,
495 extra_result_code=extra_result_code,
496 timestamp_kind=timestamp_kind,
497 event_timestamp=event_timestamp,
498 goma_stats=goma_stats,
499 goma_error=goma_error,
500 goma_crash_report_id=goma_crash_report_id,
501 patch_url=patch_url,
502 bbucket_id=bbucket_id,
503 category=category).send()
504
505
506 def send_events(events):
507 """Send several events at once to the endpoint.
508
509 Args:
510 events (iterable of Event): events to send
511
512 Return:
513 success (bool): True if data was successfully received by the endpoint.
514 """
515 return config._router.push_event(tuple(e.log_event() for e in events))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698