| OLD | NEW |
| (Empty) |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import logging | |
| 6 | |
| 7 from google.protobuf.message import DecodeError | |
| 8 from infra_libs.event_mon.protos.chrome_infra_log_pb2 import ( | |
| 9 ChromeInfraEvent, ServiceEvent, BuildEvent) | |
| 10 from infra_libs.event_mon.protos.goma_stats_pb2 import GomaStats | |
| 11 from infra_libs.event_mon.protos.log_request_lite_pb2 import LogRequestLite | |
| 12 from infra_libs.event_mon import config, router | |
| 13 | |
| 14 | |
| 15 # These constants are part of the API. | |
| 16 EVENT_TYPES = ('START', 'STOP', 'UPDATE', 'CURRENT_VERSION', 'CRASH') | |
| 17 BUILD_EVENT_TYPES = ('SCHEDULER', 'BUILD', 'STEP') | |
| 18 BUILD_RESULTS = ('UNKNOWN', 'SUCCESS', 'FAILURE', 'INFRA_FAILURE', | |
| 19 'WARNING', 'SKIPPED', 'RETRY') | |
| 20 TIMESTAMP_KINDS = ('UNKNOWN', 'POINT', 'BEGIN', 'END') | |
| 21 GOMA_ERROR_TYPES = ('GOMA_ERROR_OK', 'GOMA_ERROR_UNKNOWN', 'GOMA_ERROR_CRASHED', | |
| 22 'GOMA_ERROR_LOG_FATAL') | |
| 23 | |
| 24 # Maximum size of stack trace sent in an event, in characters. | |
| 25 STACK_TRACE_MAX_SIZE = 1000 | |
| 26 | |
| 27 | |
| 28 class Event(object): | |
| 29 """Wraps the event proto with the necessary boilerplate code.""" | |
| 30 | |
| 31 def __init__(self, timestamp_kind=None, | |
| 32 event_timestamp_ms=None, service_name=None): | |
| 33 """ | |
| 34 Args: | |
| 35 timestamp_kind (string): 'POINT', 'START' or 'STOP'. | |
| 36 event_timestamp_ms (int or float): time of the event in milliseconds | |
| 37 from Unix epoch. Default: now. | |
| 38 service_name (string): name of the monitored service. | |
| 39 """ | |
| 40 self._timestamp_ms = event_timestamp_ms | |
| 41 self._event = _get_chrome_infra_event( | |
| 42 timestamp_kind, service_name=service_name) | |
| 43 | |
| 44 @property | |
| 45 def is_null(self): | |
| 46 return self.proto is None | |
| 47 | |
| 48 @staticmethod | |
| 49 def null(): | |
| 50 """Create an "null" Event, without the proto. | |
| 51 | |
| 52 Null event's send() method will fail (return False). This is useful for | |
| 53 returning a consistent object type from helper functions even in the | |
| 54 case of failure. | |
| 55 """ | |
| 56 event = Event() | |
| 57 event._event = None | |
| 58 return event | |
| 59 | |
| 60 @property | |
| 61 def proto(self): | |
| 62 return self._event | |
| 63 | |
| 64 def log_event(self): | |
| 65 if self.is_null: | |
| 66 return None | |
| 67 return _get_log_event_lite( | |
| 68 self.proto, event_timestamp=self._timestamp_ms) | |
| 69 | |
| 70 def send(self): | |
| 71 if self.proto is None: | |
| 72 return False | |
| 73 return config._router.push_event(self.log_event()) | |
| 74 | |
| 75 | |
| 76 def _get_chrome_infra_event(timestamp_kind, service_name=None): | |
| 77 """Compute a basic event. | |
| 78 | |
| 79 Validates the inputs and returns a pre-filled ChromeInfraEvent or | |
| 80 None if any check failed. | |
| 81 | |
| 82 The proto is filled using values provided in setup_monitoring() at | |
| 83 initialization time, and args. | |
| 84 | |
| 85 Args: | |
| 86 timestamp_kind (string): any of ('POINT', 'BEGIN', 'END'). | |
| 87 | |
| 88 Returns: | |
| 89 event (chrome_infra_log_pb2.ChromeInfraEvent): | |
| 90 """ | |
| 91 # Testing for None because we want an error message when timestamp_kind == ''. | |
| 92 if timestamp_kind is not None and timestamp_kind not in TIMESTAMP_KINDS: | |
| 93 logging.error('Invalid value for timestamp_kind: %s', timestamp_kind) | |
| 94 return None | |
| 95 | |
| 96 # We must accept unicode here. | |
| 97 if service_name is not None and not isinstance(service_name, basestring): | |
| 98 logging.error('Invalid type for service_name: %s', type(service_name)) | |
| 99 return None | |
| 100 | |
| 101 event = ChromeInfraEvent() | |
| 102 event.CopyFrom(config._cache['default_event']) | |
| 103 | |
| 104 if timestamp_kind: | |
| 105 event.timestamp_kind = ChromeInfraEvent.TimestampKind.Value(timestamp_kind) | |
| 106 if service_name: | |
| 107 event.event_source.service_name = service_name | |
| 108 | |
| 109 return event | |
| 110 | |
| 111 | |
| 112 def _get_log_event_lite(chrome_infra_event, event_timestamp=None): | |
| 113 """Wraps a ChromeInfraEvent into a LogEventLite. | |
| 114 | |
| 115 Args: | |
| 116 event_timestamp (int or float): timestamp of when the event happened | |
| 117 as a number of milliseconds since the epoch. If None, the current time | |
| 118 is used. | |
| 119 | |
| 120 Returns: | |
| 121 log_event (log_request_lite_pb2.LogRequestLite.LogEventLite): | |
| 122 """ | |
| 123 if not isinstance(event_timestamp, (int, float, None.__class__ )): | |
| 124 logging.error('Invalid type for event_timestamp. Needs a number, got %s', | |
| 125 type(event_timestamp)) | |
| 126 return None | |
| 127 | |
| 128 log_event = LogRequestLite.LogEventLite() | |
| 129 log_event.event_time_ms = int(event_timestamp or router.time_ms()) | |
| 130 log_event.source_extension = chrome_infra_event.SerializeToString() | |
| 131 return log_event | |
| 132 | |
| 133 | |
| 134 def _get_service_event(event_type, | |
| 135 timestamp_kind=None, | |
| 136 event_timestamp=None, | |
| 137 code_version=None, | |
| 138 stack_trace=None, | |
| 139 service_name=None): | |
| 140 """Compute a ChromeInfraEvent filled with a ServiceEvent. | |
| 141 Arguments are identical to those in send_service_event(), please refer | |
| 142 to this docstring. | |
| 143 | |
| 144 Returns: | |
| 145 event (Event): can be a "null" Event if there is a major processing issue. | |
| 146 """ | |
| 147 if event_type not in EVENT_TYPES: | |
| 148 logging.error('Invalid value for event_type: %s', event_type) | |
| 149 return Event.null() | |
| 150 | |
| 151 if timestamp_kind is None: | |
| 152 timestamp_kind = 'POINT' | |
| 153 if event_type == 'START': | |
| 154 timestamp_kind = 'BEGIN' | |
| 155 elif event_type == 'STOP': | |
| 156 timestamp_kind = 'END' | |
| 157 elif event_type == 'CRASH': | |
| 158 timestamp_kind = 'END' | |
| 159 | |
| 160 event_wrapper = Event(timestamp_kind, event_timestamp, service_name) | |
| 161 if event_wrapper.is_null: | |
| 162 return event_wrapper | |
| 163 | |
| 164 event = event_wrapper.proto | |
| 165 | |
| 166 event.service_event.type = getattr(ServiceEvent, event_type) | |
| 167 | |
| 168 if code_version is None: | |
| 169 code_version = () | |
| 170 if not isinstance(code_version, (tuple, list)): | |
| 171 logging.error('Invalid type provided to code_version argument in ' | |
| 172 '_get_service_event. Please fix the calling code. ' | |
| 173 'Type provided: %s, expected list, tuple or None.', | |
| 174 type(code_version)) | |
| 175 code_version = () | |
| 176 | |
| 177 for version_d in code_version: | |
| 178 try: | |
| 179 if 'source_url' not in version_d: | |
| 180 logging.error('source_url missing in %s', version_d) | |
| 181 continue | |
| 182 | |
| 183 version = event.service_event.code_version.add() | |
| 184 version.source_url = version_d['source_url'] | |
| 185 if 'revision' in version_d: | |
| 186 # Rely on the url to switch between svn and git because an | |
| 187 # abbreviated sha1 can sometimes be confused with an int. | |
| 188 if version.source_url.startswith('svn://'): | |
| 189 version.svn_revision = int(version_d['revision']) | |
| 190 else: | |
| 191 version.git_hash = version_d['revision'] | |
| 192 | |
| 193 if 'version' in version_d: | |
| 194 version.version = version_d['version'] | |
| 195 if 'dirty' in version_d: | |
| 196 version.dirty = version_d['dirty'] | |
| 197 | |
| 198 except TypeError: | |
| 199 logging.exception('Invalid type provided to code_version argument in ' | |
| 200 '_get_service_event. Please fix the calling code.') | |
| 201 continue | |
| 202 | |
| 203 if isinstance(stack_trace, basestring): | |
| 204 if event_type != 'CRASH': | |
| 205 logging.error('stack_trace provide for an event different from CRASH.' | |
| 206 ' Got: %s', event_type) | |
| 207 event.service_event.stack_trace = stack_trace[-STACK_TRACE_MAX_SIZE:] | |
| 208 else: | |
| 209 if stack_trace is not None: | |
| 210 logging.error('stack_trace should be a string, got %s', | |
| 211 stack_trace.__class__.__name__) | |
| 212 | |
| 213 return event_wrapper | |
| 214 | |
| 215 | |
| 216 def send_service_event(event_type, | |
| 217 timestamp_kind=None, | |
| 218 event_timestamp=None, | |
| 219 code_version=(), | |
| 220 stack_trace=None): | |
| 221 """Send service event. | |
| 222 | |
| 223 Args: | |
| 224 event_type (string): any name of enum ServiceEvent.ServiceEventType. | |
| 225 ('START', 'STOP', 'UPDATE', 'CURRENT_VERSION', 'CRASH') | |
| 226 | |
| 227 Keyword Args: | |
| 228 timestamp_kind (string): any of ('POINT', 'BEGIN', 'END'). | |
| 229 | |
| 230 event_timestamp (int or float): timestamp of when the event happened | |
| 231 as a number of milliseconds since the epoch. If not provided, the | |
| 232 current time is used. | |
| 233 | |
| 234 code_version (list/tuple of dict or None): required keys are | |
| 235 'source_url' -> full url to the repository | |
| 236 'revision' -> (string) git sha1 or svn revision number. | |
| 237 optional keys are | |
| 238 'dirty' -> boolean. True if the local source tree has local | |
| 239 modification. | |
| 240 'version' -> manually-set version number (like 'v2.6.0') | |
| 241 | |
| 242 stack_trace (str): when event_type is 'CRASH', stack trace of the crash | |
| 243 as a string. String is truncated to 1000 characters (the last ones | |
| 244 are kept). Use traceback.format_exc() to get the stack trace from an | |
| 245 exception handler. | |
| 246 | |
| 247 Returns: | |
| 248 success (bool): False if some error happened. | |
| 249 """ | |
| 250 return _get_service_event(event_type=event_type, | |
| 251 timestamp_kind=timestamp_kind, | |
| 252 service_name=None, | |
| 253 event_timestamp=event_timestamp, | |
| 254 code_version=code_version, | |
| 255 stack_trace=stack_trace).send() | |
| 256 | |
| 257 | |
| 258 def get_build_event(event_type, | |
| 259 hostname, | |
| 260 build_name, | |
| 261 build_number=None, | |
| 262 build_scheduling_time=None, | |
| 263 step_name=None, | |
| 264 step_text=None, | |
| 265 step_number=None, | |
| 266 result=None, | |
| 267 extra_result_code=None, | |
| 268 timestamp_kind=None, | |
| 269 event_timestamp=None, | |
| 270 service_name=None, | |
| 271 goma_stats=None, | |
| 272 goma_error=None, | |
| 273 goma_crash_report_id=None, | |
| 274 patch_url=None, | |
| 275 bbucket_id=None, | |
| 276 category=None): | |
| 277 """Compute a ChromeInfraEvent filled with a BuildEvent. | |
| 278 | |
| 279 Arguments are identical to those in send_build_event(), please refer | |
| 280 to this docstring. | |
| 281 | |
| 282 Returns: | |
| 283 event (log_request_lite_pb2.LogRequestLite.LogEventLite): can be None | |
| 284 if there is a major processing issue. | |
| 285 """ | |
| 286 if event_type not in BUILD_EVENT_TYPES: | |
| 287 logging.error('Invalid value for event_type: %s', event_type) | |
| 288 return Event.null() | |
| 289 | |
| 290 event_wrapper = Event(timestamp_kind, event_timestamp, | |
| 291 service_name=service_name) | |
| 292 if event_wrapper.is_null: | |
| 293 return event_wrapper | |
| 294 | |
| 295 event = event_wrapper.proto | |
| 296 event.build_event.type = BuildEvent.BuildEventType.Value(event_type) | |
| 297 | |
| 298 if hostname: | |
| 299 event.build_event.host_name = hostname | |
| 300 if not event.build_event.HasField('host_name'): | |
| 301 logging.error('hostname must be provided, got %s', hostname) | |
| 302 | |
| 303 if build_name: | |
| 304 event.build_event.build_name = build_name | |
| 305 if not event.build_event.HasField('build_name'): | |
| 306 logging.error('build_name must be provided, got %s', build_name) | |
| 307 | |
| 308 # 0 is a valid value for build_number | |
| 309 if build_number is not None: | |
| 310 event.build_event.build_number = build_number | |
| 311 | |
| 312 # 0 is not a valid scheduling time | |
| 313 if build_scheduling_time: | |
| 314 event.build_event.build_scheduling_time_ms = build_scheduling_time | |
| 315 | |
| 316 if event.build_event.HasField('build_number'): | |
| 317 if event_type == 'SCHEDULER': | |
| 318 logging.error('build_number should not be provided for a "SCHEDULER"' | |
| 319 ' type, got %s (drop or use BUILD or STEP type)', | |
| 320 build_number) | |
| 321 | |
| 322 if not event.build_event.HasField('build_scheduling_time_ms'): | |
| 323 logging.error('build_number has been provided (%s), ' | |
| 324 'build_scheduling_time was not. ' | |
| 325 'Provide either both or none.', | |
| 326 event.build_event.build_number) | |
| 327 else: # no 'build_number' field | |
| 328 if event.build_event.HasField('build_scheduling_time_ms'): | |
| 329 logging.error('build_number has not been provided, ' | |
| 330 'build_scheduling_time was provided (%s). ' | |
| 331 'Both must be present or missing.', | |
| 332 event.build_event.build_scheduling_time_ms) | |
| 333 | |
| 334 if step_name: | |
| 335 event.build_event.step_name = str(step_name) | |
| 336 if step_text: | |
| 337 event.build_event.step_text = str(step_text) | |
| 338 | |
| 339 if step_number is not None: | |
| 340 event.build_event.step_number = step_number | |
| 341 if patch_url is not None: | |
| 342 event.build_event.patch_url = patch_url | |
| 343 if bbucket_id is not None: | |
| 344 try: | |
| 345 event.build_event.bbucket_id = int(bbucket_id) | |
| 346 except (ValueError, TypeError): | |
| 347 pass | |
| 348 | |
| 349 if category: | |
| 350 event.build_event.category = { | |
| 351 'cq': BuildEvent.CATEGORY_CQ, | |
| 352 'cq_experimental': BuildEvent.CATEGORY_CQ_EXPERIMENTAL, | |
| 353 'git_cl_try': BuildEvent.CATEGORY_GIT_CL_TRY, | |
| 354 }.get(category.lower(), BuildEvent.CATEGORY_UNKNOWN) | |
| 355 | |
| 356 | |
| 357 if event.build_event.step_name: | |
| 358 if event_type != 'STEP': | |
| 359 logging.error('step_name should be provided only for type "STEP", ' | |
| 360 'got %s', event_type) | |
| 361 if not event.build_event.HasField('step_number'): | |
| 362 logging.error('step_number was not provided, but got a value for ' | |
| 363 'step_name (%s). Provide either both or none', | |
| 364 step_name) | |
| 365 if (not event.build_event.HasField('build_number') | |
| 366 and not event.build_event.HasField('build_scheduling_time_ms')): | |
| 367 logging.error('build information must be provided when step ' | |
| 368 'information is provided. Got nothing in build_name ' | |
| 369 'and build_number') | |
| 370 else: | |
| 371 if event.build_event.HasField('step_number'): | |
| 372 logging.error('step_number has been provided (%s), ' | |
| 373 'step_name has not. ' | |
| 374 'Both must be present or missing.', | |
| 375 event.build_event.step_number) | |
| 376 | |
| 377 # TODO(pgervais) remove this. | |
| 378 # Hack to work around errors in the proto | |
| 379 mapping = {'WARNINGS': 'WARNING', 'EXCEPTION': 'INFRA_FAILURE'} | |
| 380 result = mapping.get(result, result) | |
| 381 | |
| 382 if result is not None: # we want an error message if result==''. | |
| 383 if result not in BUILD_RESULTS: | |
| 384 logging.error('Invalid value for result: %s', result) | |
| 385 else: | |
| 386 event.build_event.result = getattr(BuildEvent, result) | |
| 387 | |
| 388 if event_type == 'SCHEDULER': | |
| 389 logging.error('A result was provided for a "SCHEDULER" event type ' | |
| 390 '(%s). This is only accepted for BUILD and TEST types.', | |
| 391 result) | |
| 392 | |
| 393 if isinstance(extra_result_code, basestring): | |
| 394 extra_result_code = (extra_result_code, ) | |
| 395 if not isinstance(extra_result_code, (list, tuple)): | |
| 396 if extra_result_code is not None: | |
| 397 logging.error('extra_result_code must be a string or list of strings. ' | |
| 398 'Got %s' % type(extra_result_code)) | |
| 399 else: | |
| 400 non_strings = [] | |
| 401 extra_result_strings = [] | |
| 402 for s in extra_result_code: | |
| 403 if not isinstance(s, basestring): | |
| 404 non_strings.append(s) | |
| 405 else: | |
| 406 extra_result_strings.append(s) | |
| 407 | |
| 408 if non_strings: | |
| 409 logging.error('some values provided to extra_result_code are not strings:' | |
| 410 ' %s' % str(non_strings)) | |
| 411 for s in extra_result_strings: | |
| 412 event.build_event.extra_result_code.append(s) | |
| 413 | |
| 414 if goma_stats: | |
| 415 if isinstance(goma_stats, GomaStats): | |
| 416 event.build_event.goma_stats.MergeFrom(goma_stats) | |
| 417 else: | |
| 418 logging.error('expected goma_stats to be an instance of GomaStats, ' | |
| 419 'got %s', type(goma_stats)) | |
| 420 if goma_error: | |
| 421 if goma_stats: | |
| 422 logging.error('Only one of goma_error and goma_stats can be provided. ' | |
| 423 'Got %s and %s.', goma_error, goma_stats) | |
| 424 event.build_event.goma_error = BuildEvent.GomaErrorType.Value(goma_error) | |
| 425 if goma_crash_report_id: | |
| 426 event.build_event.goma_crash_report_id = goma_crash_report_id | |
| 427 if goma_error != 'GOMA_ERROR_CRASHED': | |
| 428 logging.error('A crash report id (%s) was provided for GomaErrorType ' | |
| 429 '(%s). This is only accepted for GOMA_ERROR_CRASHED ' | |
| 430 'type.', goma_crash_report_id, goma_error) | |
| 431 | |
| 432 return event_wrapper | |
| 433 | |
| 434 | |
| 435 def send_build_event(event_type, | |
| 436 hostname, | |
| 437 build_name, | |
| 438 build_number=None, | |
| 439 build_scheduling_time=None, | |
| 440 step_name=None, | |
| 441 step_text=None, | |
| 442 step_number=None, | |
| 443 result=None, | |
| 444 extra_result_code=None, | |
| 445 timestamp_kind=None, | |
| 446 event_timestamp=None, | |
| 447 goma_stats=None, | |
| 448 goma_error=None, | |
| 449 goma_crash_report_id=None, | |
| 450 patch_url=None, | |
| 451 bbucket_id=None, | |
| 452 category=None): | |
| 453 """Send a ChromeInfraEvent filled with a BuildEvent | |
| 454 | |
| 455 Args: | |
| 456 event_type (string): any name of enum BuildEvent.BuildEventType. | |
| 457 (listed in infra_libs.event_mon.monitoring.BUILD_EVENT_TYPES) | |
| 458 hostname (string): fqdn of the machine that is running the build / step. | |
| 459 aka the bot name. | |
| 460 build_name (string): name of the builder. | |
| 461 | |
| 462 Keyword args: | |
| 463 build_number (int): as the name says. | |
| 464 build_scheduling_time (int): timestamp telling when the build was | |
| 465 scheduled. This is required when build_number is provided to make it | |
| 466 possibly to distinguish two builds with the same build number. | |
| 467 step_name (str): name of the step. | |
| 468 step_text (str): text of the step. | |
| 469 step_number (int): rank of the step in the build. This is mandatory | |
| 470 if step_name is provided, because step_name is not enough to tell the | |
| 471 order. | |
| 472 result (string): any name of enum BuildEvent.BuildResult. | |
| 473 (listed in infra_libs.event_mon.monitoring.BUILD_RESULTS) | |
| 474 extra_result_code (string or list of): arbitrary strings intended to provide | |
| 475 more fine-grained information about the result. | |
| 476 goma_stats (goma_stats_pb2.GomaStats): statistics output by the Goma proxy. | |
| 477 goma_error (string): goma error type defined as GomaErrorType. | |
| 478 goma_crash_report_id (string): id of goma crash report. | |
| 479 patch_url (string): URL of the patch that triggered build | |
| 480 bbucket_id (long): Buildbucket ID of the build. | |
| 481 category (string): Build category, e.g. cq or git_cl_try. | |
| 482 | |
| 483 Returns: | |
| 484 success (bool): False if some error happened. | |
| 485 """ | |
| 486 return get_build_event(event_type, | |
| 487 hostname, | |
| 488 build_name, | |
| 489 build_number=build_number, | |
| 490 build_scheduling_time=build_scheduling_time, | |
| 491 step_name=step_name, | |
| 492 step_text=step_text, | |
| 493 step_number=step_number, | |
| 494 result=result, | |
| 495 extra_result_code=extra_result_code, | |
| 496 timestamp_kind=timestamp_kind, | |
| 497 event_timestamp=event_timestamp, | |
| 498 goma_stats=goma_stats, | |
| 499 goma_error=goma_error, | |
| 500 goma_crash_report_id=goma_crash_report_id, | |
| 501 patch_url=patch_url, | |
| 502 bbucket_id=bbucket_id, | |
| 503 category=category).send() | |
| 504 | |
| 505 | |
| 506 def send_events(events): | |
| 507 """Send several events at once to the endpoint. | |
| 508 | |
| 509 Args: | |
| 510 events (iterable of Event): events to send | |
| 511 | |
| 512 Return: | |
| 513 success (bool): True if data was successfully received by the endpoint. | |
| 514 """ | |
| 515 return config._router.push_event(tuple(e.log_event() for e in events)) | |
| OLD | NEW |