Chromium Code Reviews| Index: appengine/swarming/event_mon_metrics.py |
| diff --git a/appengine/swarming/event_mon_metrics.py b/appengine/swarming/event_mon_metrics.py |
| index ba5a3b68e062bcb721f14bfe8ad4380a48e24644..1b1739983ede2059d59eefd2c89a08fa68d5d4aa 100644 |
| --- a/appengine/swarming/event_mon_metrics.py |
| +++ b/appengine/swarming/event_mon_metrics.py |
| @@ -3,28 +3,160 @@ |
| # that can be found in the LICENSE file. |
| import logging |
| +import time |
| import gae_event_mon |
| +from server import task_result |
| + |
| + |
| +DIMENSIONS = ( |
| + ('cores', int), |
| + ('cpu', unicode), |
| + ('device_os', unicode), |
| + ('device_type', unicode), |
| + ('gpu', unicode), |
| + ('hidpi', unicode), |
| + ('machine_type', unicode), |
| + ('os', unicode), |
| + ('pool', unicode), |
| + ('xcode_version', unicode), |
| + ('zone', unicode), |
| +) |
| + |
| + |
| +def _to_timestamp(dt): |
| + return int(time.mktime(dt.timetuple())) |
| + |
| + |
| +def _files_ref_to_proto(files_ref, proto): |
|
M-A Ruel
2016/12/06 15:40:09
You think it is useful information to stream? I do
Paweł Hajdan Jr.
2016/12/07 20:27:56
I think we already had that discussion. It's part
|
| + if files_ref.isolated: |
| + proto.isolated = files_ref.isolated |
| + if files_ref.isolatedserver: |
| + proto.isolatedserver = files_ref.isolatedserver |
| + if files_ref.namespace: |
| + proto.namespace = files_ref.namespace |
| + |
| + |
| +def _cipd_package_to_proto(cipd_package, package_proto): |
| + if cipd_package.package_name: |
| + package_proto.package_name = cipd_package.package_name |
| + if cipd_package.version: |
| + package_proto.version = cipd_package.version |
| + if package_proto.path: |
| + package_proto.path = cipd_package.path |
| + |
| + |
| +def _task_summary_to_proto(summary, event): |
| + event.proto.swarming_task_event.id = summary.task_id |
| + |
| + request_proto = event.proto.swarming_task_event.request |
| + if summary.request.parent_task_id: |
| + request_proto.parent_task_id = summary.request.parent_task_id |
| + request_proto.name = summary.request.name |
| + request_proto.created_ts = _to_timestamp(summary.request.created_ts) |
| + request_proto.expiration_ts = _to_timestamp(summary.request.expiration_ts) |
| + request_proto.priority = summary.request.priority |
| + if summary.request.pubsub_topic: |
| + request_proto.pubsub_topic = summary.request.pubsub_topic |
| + |
| + task_properties = summary.request.properties |
| + properties_proto = request_proto.properties |
| + |
| + if task_properties.inputs_ref: |
| + _files_ref_to_proto(task_properties.inputs_ref, properties_proto.inputs_ref) |
| + |
| + if task_properties.cipd_input: |
| + cipd_proto = properties_proto.cipd_input |
| + cipd_proto.server = task_properties.cipd_input.server |
| + |
| + _cipd_package_to_proto(task_properties.cipd_input.client_package, |
| + cipd_proto.client_package) |
| + for package in task_properties.cipd_input.packages: |
| + package_proto = cipd_proto.packages.add() |
| + _cipd_package_to_proto(package, package_proto) |
| + |
| + for d, t in DIMENSIONS: |
| + if d in task_properties.dimensions: |
| + getattr(properties_proto.dimensions, d).append( |
| + t(task_properties.dimensions[d])) |
| + |
| + if task_properties.execution_timeout_secs: |
|
M-A Ruel
2016/12/06 15:40:09
it is always set; no need for condition
|
| + properties_proto.execution_timeout_s = task_properties.execution_timeout_secs |
| + if task_properties.grace_period_secs: |
|
M-A Ruel
2016/12/06 15:40:09
same here, and io_timeout
Paweł Hajdan Jr.
2016/12/07 20:27:56
I'm getting exceptions if I remove. For _some_ of
M-A Ruel
2016/12/07 21:47:53
Humm that's weird. ok.
|
| + properties_proto.grace_period_s = task_properties.grace_period_secs |
| + if task_properties.io_timeout_secs: |
| + properties_proto.io_timeout_s = task_properties.io_timeout_secs |
| + properties_proto.idempotent = task_properties.idempotent |
| + |
| + state_enum = event.proto.swarming_task_event.State.DESCRIPTOR.values_by_name |
| + if summary.state == task_result.State.COMPLETED: |
| + event.proto.swarming_task_event.state = state_enum['COMPLETED'].number |
|
M-A Ruel
2016/12/06 15:40:09
maybe use a string representation instead?
Paweł Hajdan Jr.
2016/12/07 20:27:56
Do you have a more specific suggestion? Please not
M-A Ruel
2016/12/07 21:47:53
Ah, ignore me then
|
| + elif summary.state == task_result.State.CANCELED: |
| + event.proto.swarming_task_event.state = state_enum['CANCELED'].number |
| + elif summary.state == task_result.State.BOT_DIED: |
| + event.proto.swarming_task_event.state = state_enum['BOT_DIED'].number |
| + elif summary.state == task_result.State.TIMED_OUT: |
| + event.proto.swarming_task_event.state = state_enum['TIMED_OUT'].number |
| + elif summary.state == task_result.State.EXPIRED: |
| + event.proto.swarming_task_event.state = state_enum['EXPIRED'].number |
| + else: |
| + logging.error('Unhandled task state %r', summary.state) |
| + |
| + event.proto.swarming_task_event.bot_id = summary.bot_id |
| + event.proto.swarming_task_event.bot_version = summary.bot_version |
| + |
| + for d, t in DIMENSIONS: |
| + for v in summary.bot_dimensions.get(d, []): |
| + getattr(event.proto.swarming_task_event.bot_dimensions, d).append(t(v)) |
| + |
| + for v in summary.server_versions: |
| + event.proto.swarming_task_event.server_versions.append(v) |
| + |
| + event.proto.swarming_task_event.internal_failure = summary.internal_failure |
| + event.proto.swarming_task_event.exit_code = summary.exit_code |
| + if summary.started_ts: |
| + event.proto.swarming_task_event.started_ts = _to_timestamp( |
| + summary.started_ts) |
| + if summary.completed_ts: |
| + event.proto.swarming_task_event.completed_ts = _to_timestamp( |
| + summary.completed_ts) |
| + if summary.abandoned_ts: |
| + event.proto.swarming_task_event.abandoned_ts = _to_timestamp( |
| + summary.abandoned_ts) |
| + |
| + for task_id in summary.children_task_ids: |
| + event.proto.swarming_task_event.children_task_ids.append(task_id) |
| + |
| + if summary.outputs_ref: |
| + _files_ref_to_proto( |
| + summary.outputs_ref, event.proto.swarming_task_event.outputs_ref) |
| + |
| + event.proto.swarming_task_event.cost_usd = summary.cost_usd |
| + if summary.cost_saved_usd: |
| + event.proto.swarming_task_event.cost_saved_usd = summary.cost_saved_usd |
| + if summary.deduped_from: |
| + event.proto.swarming_task_event.deduped_from = summary.deduped_from |
| + event.proto.swarming_task_event.try_number = summary.try_number |
| + |
| def initialize(): |
| gae_event_mon.initialize('swarming') |
| -def send_task_event(task_result_summary): |
| +def send_task_event(summary): |
| """Sends an event_mon event about a swarming task. |
| Currently implemented as sending a HTTP request. |
| Args: |
| - task_result_summary: TaskResultSummary object. |
| + summary: TaskResultSummary object. |
| """ |
| - event = gae_event_mon.Event('POINT') |
| - event.proto.swarming_task_event.id = task_result_summary.task_id |
| - |
| # Isolate rest of the app from monitoring pipeline issues. They should |
| # not cause outage of swarming. |
| try: |
| + event = gae_event_mon.Event('POINT') |
| + _task_summary_to_proto(summary, event) |
| event.send() |
| except Exception: |
| logging.exception('Caught exception while sending event') |