OLD | NEW |
| (Empty) |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import logging | |
6 | |
7 from google.protobuf.message import DecodeError | |
8 from infra_libs.event_mon.protos.chrome_infra_log_pb2 import ( | |
9 ChromeInfraEvent, ServiceEvent, BuildEvent) | |
10 from infra_libs.event_mon.protos.goma_stats_pb2 import GomaStats | |
11 from infra_libs.event_mon.protos.log_request_lite_pb2 import LogRequestLite | |
12 from infra_libs.event_mon import config, router | |
13 | |
14 | |
15 # These constants are part of the API. | |
16 EVENT_TYPES = ('START', 'STOP', 'UPDATE', 'CURRENT_VERSION', 'CRASH') | |
17 BUILD_EVENT_TYPES = ('SCHEDULER', 'BUILD', 'STEP') | |
18 BUILD_RESULTS = ('UNKNOWN', 'SUCCESS', 'FAILURE', 'INFRA_FAILURE', | |
19 'WARNING', 'SKIPPED', 'RETRY') | |
20 TIMESTAMP_KINDS = ('UNKNOWN', 'POINT', 'BEGIN', 'END') | |
21 GOMA_ERROR_TYPES = ('GOMA_ERROR_OK', 'GOMA_ERROR_UNKNOWN', 'GOMA_ERROR_CRASHED', | |
22 'GOMA_ERROR_LOG_FATAL') | |
23 | |
24 # Maximum size of stack trace sent in an event, in characters. | |
25 STACK_TRACE_MAX_SIZE = 1000 | |
26 | |
27 | |
28 class Event(object): | |
29 """Wraps the event proto with the necessary boilerplate code.""" | |
30 | |
31 def __init__(self, timestamp_kind=None, | |
32 event_timestamp_ms=None, service_name=None): | |
33 """ | |
34 Args: | |
35 timestamp_kind (string): 'POINT', 'START' or 'STOP'. | |
36 event_timestamp_ms (int or float): time of the event in milliseconds | |
37 from Unix epoch. Default: now. | |
38 service_name (string): name of the monitored service. | |
39 """ | |
40 self._timestamp_ms = event_timestamp_ms | |
41 self._event = _get_chrome_infra_event( | |
42 timestamp_kind, service_name=service_name) | |
43 | |
44 @property | |
45 def is_null(self): | |
46 return self.proto is None | |
47 | |
48 @staticmethod | |
49 def null(): | |
50 """Create an "null" Event, without the proto. | |
51 | |
52 Null event's send() method will fail (return False). This is useful for | |
53 returning a consistent object type from helper functions even in the | |
54 case of failure. | |
55 """ | |
56 event = Event() | |
57 event._event = None | |
58 return event | |
59 | |
60 @property | |
61 def proto(self): | |
62 return self._event | |
63 | |
64 def log_event(self): | |
65 if self.is_null: | |
66 return None | |
67 return _get_log_event_lite( | |
68 self.proto, event_timestamp=self._timestamp_ms) | |
69 | |
70 def send(self): | |
71 if self.proto is None: | |
72 return False | |
73 return config._router.push_event(self.log_event()) | |
74 | |
75 | |
76 def _get_chrome_infra_event(timestamp_kind, service_name=None): | |
77 """Compute a basic event. | |
78 | |
79 Validates the inputs and returns a pre-filled ChromeInfraEvent or | |
80 None if any check failed. | |
81 | |
82 The proto is filled using values provided in setup_monitoring() at | |
83 initialization time, and args. | |
84 | |
85 Args: | |
86 timestamp_kind (string): any of ('POINT', 'BEGIN', 'END'). | |
87 | |
88 Returns: | |
89 event (chrome_infra_log_pb2.ChromeInfraEvent): | |
90 """ | |
91 # Testing for None because we want an error message when timestamp_kind == ''. | |
92 if timestamp_kind is not None and timestamp_kind not in TIMESTAMP_KINDS: | |
93 logging.error('Invalid value for timestamp_kind: %s', timestamp_kind) | |
94 return None | |
95 | |
96 # We must accept unicode here. | |
97 if service_name is not None and not isinstance(service_name, basestring): | |
98 logging.error('Invalid type for service_name: %s', type(service_name)) | |
99 return None | |
100 | |
101 event = ChromeInfraEvent() | |
102 event.CopyFrom(config._cache['default_event']) | |
103 | |
104 if timestamp_kind: | |
105 event.timestamp_kind = ChromeInfraEvent.TimestampKind.Value(timestamp_kind) | |
106 if service_name: | |
107 event.event_source.service_name = service_name | |
108 | |
109 return event | |
110 | |
111 | |
112 def _get_log_event_lite(chrome_infra_event, event_timestamp=None): | |
113 """Wraps a ChromeInfraEvent into a LogEventLite. | |
114 | |
115 Args: | |
116 event_timestamp (int or float): timestamp of when the event happened | |
117 as a number of milliseconds since the epoch. If None, the current time | |
118 is used. | |
119 | |
120 Returns: | |
121 log_event (log_request_lite_pb2.LogRequestLite.LogEventLite): | |
122 """ | |
123 if not isinstance(event_timestamp, (int, float, None.__class__ )): | |
124 logging.error('Invalid type for event_timestamp. Needs a number, got %s', | |
125 type(event_timestamp)) | |
126 return None | |
127 | |
128 log_event = LogRequestLite.LogEventLite() | |
129 log_event.event_time_ms = int(event_timestamp or router.time_ms()) | |
130 log_event.source_extension = chrome_infra_event.SerializeToString() | |
131 return log_event | |
132 | |
133 | |
134 def _get_service_event(event_type, | |
135 timestamp_kind=None, | |
136 event_timestamp=None, | |
137 code_version=None, | |
138 stack_trace=None, | |
139 service_name=None): | |
140 """Compute a ChromeInfraEvent filled with a ServiceEvent. | |
141 Arguments are identical to those in send_service_event(), please refer | |
142 to this docstring. | |
143 | |
144 Returns: | |
145 event (Event): can be a "null" Event if there is a major processing issue. | |
146 """ | |
147 if event_type not in EVENT_TYPES: | |
148 logging.error('Invalid value for event_type: %s', event_type) | |
149 return Event.null() | |
150 | |
151 if timestamp_kind is None: | |
152 timestamp_kind = 'POINT' | |
153 if event_type == 'START': | |
154 timestamp_kind = 'BEGIN' | |
155 elif event_type == 'STOP': | |
156 timestamp_kind = 'END' | |
157 elif event_type == 'CRASH': | |
158 timestamp_kind = 'END' | |
159 | |
160 event_wrapper = Event(timestamp_kind, event_timestamp, service_name) | |
161 if event_wrapper.is_null: | |
162 return event_wrapper | |
163 | |
164 event = event_wrapper.proto | |
165 | |
166 event.service_event.type = getattr(ServiceEvent, event_type) | |
167 | |
168 if code_version is None: | |
169 code_version = () | |
170 if not isinstance(code_version, (tuple, list)): | |
171 logging.error('Invalid type provided to code_version argument in ' | |
172 '_get_service_event. Please fix the calling code. ' | |
173 'Type provided: %s, expected list, tuple or None.', | |
174 type(code_version)) | |
175 code_version = () | |
176 | |
177 for version_d in code_version: | |
178 try: | |
179 if 'source_url' not in version_d: | |
180 logging.error('source_url missing in %s', version_d) | |
181 continue | |
182 | |
183 version = event.service_event.code_version.add() | |
184 version.source_url = version_d['source_url'] | |
185 if 'revision' in version_d: | |
186 # Rely on the url to switch between svn and git because an | |
187 # abbreviated sha1 can sometimes be confused with an int. | |
188 if version.source_url.startswith('svn://'): | |
189 version.svn_revision = int(version_d['revision']) | |
190 else: | |
191 version.git_hash = version_d['revision'] | |
192 | |
193 if 'version' in version_d: | |
194 version.version = version_d['version'] | |
195 if 'dirty' in version_d: | |
196 version.dirty = version_d['dirty'] | |
197 | |
198 except TypeError: | |
199 logging.exception('Invalid type provided to code_version argument in ' | |
200 '_get_service_event. Please fix the calling code.') | |
201 continue | |
202 | |
203 if isinstance(stack_trace, basestring): | |
204 if event_type != 'CRASH': | |
205 logging.error('stack_trace provide for an event different from CRASH.' | |
206 ' Got: %s', event_type) | |
207 event.service_event.stack_trace = stack_trace[-STACK_TRACE_MAX_SIZE:] | |
208 else: | |
209 if stack_trace is not None: | |
210 logging.error('stack_trace should be a string, got %s', | |
211 stack_trace.__class__.__name__) | |
212 | |
213 return event_wrapper | |
214 | |
215 | |
216 def send_service_event(event_type, | |
217 timestamp_kind=None, | |
218 event_timestamp=None, | |
219 code_version=(), | |
220 stack_trace=None): | |
221 """Send service event. | |
222 | |
223 Args: | |
224 event_type (string): any name of enum ServiceEvent.ServiceEventType. | |
225 ('START', 'STOP', 'UPDATE', 'CURRENT_VERSION', 'CRASH') | |
226 | |
227 Keyword Args: | |
228 timestamp_kind (string): any of ('POINT', 'BEGIN', 'END'). | |
229 | |
230 event_timestamp (int or float): timestamp of when the event happened | |
231 as a number of milliseconds since the epoch. If not provided, the | |
232 current time is used. | |
233 | |
234 code_version (list/tuple of dict or None): required keys are | |
235 'source_url' -> full url to the repository | |
236 'revision' -> (string) git sha1 or svn revision number. | |
237 optional keys are | |
238 'dirty' -> boolean. True if the local source tree has local | |
239 modification. | |
240 'version' -> manually-set version number (like 'v2.6.0') | |
241 | |
242 stack_trace (str): when event_type is 'CRASH', stack trace of the crash | |
243 as a string. String is truncated to 1000 characters (the last ones | |
244 are kept). Use traceback.format_exc() to get the stack trace from an | |
245 exception handler. | |
246 | |
247 Returns: | |
248 success (bool): False if some error happened. | |
249 """ | |
250 return _get_service_event(event_type=event_type, | |
251 timestamp_kind=timestamp_kind, | |
252 service_name=None, | |
253 event_timestamp=event_timestamp, | |
254 code_version=code_version, | |
255 stack_trace=stack_trace).send() | |
256 | |
257 | |
258 def get_build_event(event_type, | |
259 hostname, | |
260 build_name, | |
261 build_number=None, | |
262 build_scheduling_time=None, | |
263 step_name=None, | |
264 step_text=None, | |
265 step_number=None, | |
266 result=None, | |
267 extra_result_code=None, | |
268 timestamp_kind=None, | |
269 event_timestamp=None, | |
270 service_name=None, | |
271 goma_stats=None, | |
272 goma_error=None, | |
273 goma_crash_report_id=None, | |
274 patch_url=None, | |
275 bbucket_id=None, | |
276 category=None): | |
277 """Compute a ChromeInfraEvent filled with a BuildEvent. | |
278 | |
279 Arguments are identical to those in send_build_event(), please refer | |
280 to this docstring. | |
281 | |
282 Returns: | |
283 event (log_request_lite_pb2.LogRequestLite.LogEventLite): can be None | |
284 if there is a major processing issue. | |
285 """ | |
286 if event_type not in BUILD_EVENT_TYPES: | |
287 logging.error('Invalid value for event_type: %s', event_type) | |
288 return Event.null() | |
289 | |
290 event_wrapper = Event(timestamp_kind, event_timestamp, | |
291 service_name=service_name) | |
292 if event_wrapper.is_null: | |
293 return event_wrapper | |
294 | |
295 event = event_wrapper.proto | |
296 event.build_event.type = BuildEvent.BuildEventType.Value(event_type) | |
297 | |
298 if hostname: | |
299 event.build_event.host_name = hostname | |
300 if not event.build_event.HasField('host_name'): | |
301 logging.error('hostname must be provided, got %s', hostname) | |
302 | |
303 if build_name: | |
304 event.build_event.build_name = build_name | |
305 if not event.build_event.HasField('build_name'): | |
306 logging.error('build_name must be provided, got %s', build_name) | |
307 | |
308 # 0 is a valid value for build_number | |
309 if build_number is not None: | |
310 event.build_event.build_number = build_number | |
311 | |
312 # 0 is not a valid scheduling time | |
313 if build_scheduling_time: | |
314 event.build_event.build_scheduling_time_ms = build_scheduling_time | |
315 | |
316 if event.build_event.HasField('build_number'): | |
317 if event_type == 'SCHEDULER': | |
318 logging.error('build_number should not be provided for a "SCHEDULER"' | |
319 ' type, got %s (drop or use BUILD or STEP type)', | |
320 build_number) | |
321 | |
322 if not event.build_event.HasField('build_scheduling_time_ms'): | |
323 logging.error('build_number has been provided (%s), ' | |
324 'build_scheduling_time was not. ' | |
325 'Provide either both or none.', | |
326 event.build_event.build_number) | |
327 else: # no 'build_number' field | |
328 if event.build_event.HasField('build_scheduling_time_ms'): | |
329 logging.error('build_number has not been provided, ' | |
330 'build_scheduling_time was provided (%s). ' | |
331 'Both must be present or missing.', | |
332 event.build_event.build_scheduling_time_ms) | |
333 | |
334 if step_name: | |
335 event.build_event.step_name = str(step_name) | |
336 if step_text: | |
337 event.build_event.step_text = str(step_text) | |
338 | |
339 if step_number is not None: | |
340 event.build_event.step_number = step_number | |
341 if patch_url is not None: | |
342 event.build_event.patch_url = patch_url | |
343 if bbucket_id is not None: | |
344 try: | |
345 event.build_event.bbucket_id = int(bbucket_id) | |
346 except (ValueError, TypeError): | |
347 pass | |
348 | |
349 if category: | |
350 event.build_event.category = { | |
351 'cq': BuildEvent.CATEGORY_CQ, | |
352 'cq_experimental': BuildEvent.CATEGORY_CQ_EXPERIMENTAL, | |
353 'git_cl_try': BuildEvent.CATEGORY_GIT_CL_TRY, | |
354 }.get(category.lower(), BuildEvent.CATEGORY_UNKNOWN) | |
355 | |
356 | |
357 if event.build_event.step_name: | |
358 if event_type != 'STEP': | |
359 logging.error('step_name should be provided only for type "STEP", ' | |
360 'got %s', event_type) | |
361 if not event.build_event.HasField('step_number'): | |
362 logging.error('step_number was not provided, but got a value for ' | |
363 'step_name (%s). Provide either both or none', | |
364 step_name) | |
365 if (not event.build_event.HasField('build_number') | |
366 and not event.build_event.HasField('build_scheduling_time_ms')): | |
367 logging.error('build information must be provided when step ' | |
368 'information is provided. Got nothing in build_name ' | |
369 'and build_number') | |
370 else: | |
371 if event.build_event.HasField('step_number'): | |
372 logging.error('step_number has been provided (%s), ' | |
373 'step_name has not. ' | |
374 'Both must be present or missing.', | |
375 event.build_event.step_number) | |
376 | |
377 # TODO(pgervais) remove this. | |
378 # Hack to work around errors in the proto | |
379 mapping = {'WARNINGS': 'WARNING', 'EXCEPTION': 'INFRA_FAILURE'} | |
380 result = mapping.get(result, result) | |
381 | |
382 if result is not None: # we want an error message if result==''. | |
383 if result not in BUILD_RESULTS: | |
384 logging.error('Invalid value for result: %s', result) | |
385 else: | |
386 event.build_event.result = getattr(BuildEvent, result) | |
387 | |
388 if event_type == 'SCHEDULER': | |
389 logging.error('A result was provided for a "SCHEDULER" event type ' | |
390 '(%s). This is only accepted for BUILD and TEST types.', | |
391 result) | |
392 | |
393 if isinstance(extra_result_code, basestring): | |
394 extra_result_code = (extra_result_code, ) | |
395 if not isinstance(extra_result_code, (list, tuple)): | |
396 if extra_result_code is not None: | |
397 logging.error('extra_result_code must be a string or list of strings. ' | |
398 'Got %s' % type(extra_result_code)) | |
399 else: | |
400 non_strings = [] | |
401 extra_result_strings = [] | |
402 for s in extra_result_code: | |
403 if not isinstance(s, basestring): | |
404 non_strings.append(s) | |
405 else: | |
406 extra_result_strings.append(s) | |
407 | |
408 if non_strings: | |
409 logging.error('some values provided to extra_result_code are not strings:' | |
410 ' %s' % str(non_strings)) | |
411 for s in extra_result_strings: | |
412 event.build_event.extra_result_code.append(s) | |
413 | |
414 if goma_stats: | |
415 if isinstance(goma_stats, GomaStats): | |
416 event.build_event.goma_stats.MergeFrom(goma_stats) | |
417 else: | |
418 logging.error('expected goma_stats to be an instance of GomaStats, ' | |
419 'got %s', type(goma_stats)) | |
420 if goma_error: | |
421 if goma_stats: | |
422 logging.error('Only one of goma_error and goma_stats can be provided. ' | |
423 'Got %s and %s.', goma_error, goma_stats) | |
424 event.build_event.goma_error = BuildEvent.GomaErrorType.Value(goma_error) | |
425 if goma_crash_report_id: | |
426 event.build_event.goma_crash_report_id = goma_crash_report_id | |
427 if goma_error != 'GOMA_ERROR_CRASHED': | |
428 logging.error('A crash report id (%s) was provided for GomaErrorType ' | |
429 '(%s). This is only accepted for GOMA_ERROR_CRASHED ' | |
430 'type.', goma_crash_report_id, goma_error) | |
431 | |
432 return event_wrapper | |
433 | |
434 | |
435 def send_build_event(event_type, | |
436 hostname, | |
437 build_name, | |
438 build_number=None, | |
439 build_scheduling_time=None, | |
440 step_name=None, | |
441 step_text=None, | |
442 step_number=None, | |
443 result=None, | |
444 extra_result_code=None, | |
445 timestamp_kind=None, | |
446 event_timestamp=None, | |
447 goma_stats=None, | |
448 goma_error=None, | |
449 goma_crash_report_id=None, | |
450 patch_url=None, | |
451 bbucket_id=None, | |
452 category=None): | |
453 """Send a ChromeInfraEvent filled with a BuildEvent | |
454 | |
455 Args: | |
456 event_type (string): any name of enum BuildEvent.BuildEventType. | |
457 (listed in infra_libs.event_mon.monitoring.BUILD_EVENT_TYPES) | |
458 hostname (string): fqdn of the machine that is running the build / step. | |
459 aka the bot name. | |
460 build_name (string): name of the builder. | |
461 | |
462 Keyword args: | |
463 build_number (int): as the name says. | |
464 build_scheduling_time (int): timestamp telling when the build was | |
465 scheduled. This is required when build_number is provided to make it | |
466 possibly to distinguish two builds with the same build number. | |
467 step_name (str): name of the step. | |
468 step_text (str): text of the step. | |
469 step_number (int): rank of the step in the build. This is mandatory | |
470 if step_name is provided, because step_name is not enough to tell the | |
471 order. | |
472 result (string): any name of enum BuildEvent.BuildResult. | |
473 (listed in infra_libs.event_mon.monitoring.BUILD_RESULTS) | |
474 extra_result_code (string or list of): arbitrary strings intended to provide | |
475 more fine-grained information about the result. | |
476 goma_stats (goma_stats_pb2.GomaStats): statistics output by the Goma proxy. | |
477 goma_error (string): goma error type defined as GomaErrorType. | |
478 goma_crash_report_id (string): id of goma crash report. | |
479 patch_url (string): URL of the patch that triggered build | |
480 bbucket_id (long): Buildbucket ID of the build. | |
481 category (string): Build category, e.g. cq or git_cl_try. | |
482 | |
483 Returns: | |
484 success (bool): False if some error happened. | |
485 """ | |
486 return get_build_event(event_type, | |
487 hostname, | |
488 build_name, | |
489 build_number=build_number, | |
490 build_scheduling_time=build_scheduling_time, | |
491 step_name=step_name, | |
492 step_text=step_text, | |
493 step_number=step_number, | |
494 result=result, | |
495 extra_result_code=extra_result_code, | |
496 timestamp_kind=timestamp_kind, | |
497 event_timestamp=event_timestamp, | |
498 goma_stats=goma_stats, | |
499 goma_error=goma_error, | |
500 goma_crash_report_id=goma_crash_report_id, | |
501 patch_url=patch_url, | |
502 bbucket_id=bbucket_id, | |
503 category=category).send() | |
504 | |
505 | |
506 def send_events(events): | |
507 """Send several events at once to the endpoint. | |
508 | |
509 Args: | |
510 events (iterable of Event): events to send | |
511 | |
512 Return: | |
513 success (bool): True if data was successfully received by the endpoint. | |
514 """ | |
515 return config._router.push_event(tuple(e.log_event() for e in events)) | |
OLD | NEW |