Index: appengine/sheriff_o_matic/monitoring.py |
diff --git a/appengine/sheriff_o_matic/monitoring.py b/appengine/sheriff_o_matic/monitoring.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..81a11ccc0515291ddddf3a70e68d2365e685109f |
--- /dev/null |
+++ b/appengine/sheriff_o_matic/monitoring.py |
@@ -0,0 +1,83 @@ |
+#!/usr/bin/env python |
+# Copyright (c) 2015 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Send system monitoring data to the timeseries monitoring API.""" |
+ |
+import argparse |
+import logging |
+import os |
+import random |
+import sys |
+import time |
+import urllib2 |
+import webapp2 |
+ |
+import gae_ts_mon |
+import ts_alerts |
+ |
+from google.appengine.api import background_thread |
+from google.appengine.api import modules |
+ |
+ |
+access_count = gae_ts_mon.CounterMetric('gae/access/count') |
agable
2015/08/10 23:04:38
Rather than having metrics for these endpoints ("a
|
+alerts_count = gae_ts_mon.GaugeMetric('gae/alerts/count') |
+ |
+ |
+class InitializeMonitoringHandler(webapp2.RequestHandler): |
+ |
+ def get(self): |
+ job_name = 'sheriff-o-matic' |
+ instance_id = int(modules.get_current_instance_id()) |
+ endpoint = 'pubsub://chrome-infra-mon-pubsub/monacq' |
+ gae_ts_mon.initialize(job_name=job_name, number=instance_id, |
+ service_name='usage', endpoint=endpoint) |
+ self.response.set_status(200, 'Initialized instance of ts_mon.') |
+ |
+ |
+class MonitoringHandler(webapp2.RequestHandler): |
+ |
+ ACCESS_COUNT = {'ts-alerts': 0, |
agable
2015/08/10 23:04:38
Rather than keeping a dictionary here, you should
|
+ 'ts-alerts-history': 0, |
+ 'alerts': 0, |
+ 'alerts-history': 0, |
+ 'api/v1/alerts': 0} |
+ |
+ |
+ ''' Called my cron jobs every 5 minutes to update alerts_count. ''' |
agable
2015/08/10 23:04:38
nit: """Called by cron... alerts_count."""
|
+ def get(self, key=None): |
+ if key and key in self.ACCESS_COUNT.keys(): |
+ self.ACCESS_COUNT[key] += 1 |
+ self.response.write('POST SUCCESS: %s' % key) |
+ return |
+ elif key: |
+ self.response.write('Unknown key %s' % key) |
+ self.abort(400) |
+ return |
+ for endpoint in self.ACCESS_COUNT.keys(): |
+ access_count.set(self.ACCESS_COUNT[endpoint], {'service': endpoint}) |
+ time_series_count = len(ts_alerts.TSAlertsJSON.query_active().fetch()) |
+ alerts_count.set(time_series_count, {'service': 'time-series-alerts'}) |
+ gae_ts_mon.flush() |
+ self.response.write('Sheriff-o-matic metrics updated.') |
+ return |
+ |
+ def post(self): |
+ logging.info('in post request') |
agable
2015/08/10 23:04:38
nit: remove some of the extraneous logging before
|
+ logging.info(self.request.body) |
+ key = self.request.body['endpoint'] |
+ if key not in self.ACCESS_COUNT.keys(): |
+ self.response.write('Unknown key %s', key) |
+ self.abort(400) |
+ return |
+ self.ACCESS_COUNT[key] += 1 |
+ self.response.write('POST SUCCESS: ' + key) |
+ |
+ |
+app = webapp2.WSGIApplication([ |
+ ('/_ah/start', InitializeMonitoringHandler), |
+ ('/monitoring', MonitoringHandler), |
+ ('/monitoring/(.*)', MonitoringHandler) |
+], debug=True) |
+ |