Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1077)

Side by Side Diff: alertserver/alerts.cfg

Issue 1692693006: Add alert for buildbot scheduler (Closed) Base URL: https://skia.googlesource.com/buildbot@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file defines alerts to be triggered by the server. 1 # This file defines alerts to be triggered by the server.
2 2
3 # 3 #
4 # AlertServer should tolerate no errors. 4 # AlertServer should tolerate no errors.
5 5
6 6
7 [[rule]] 7 [[rule]]
8 name = "AlertServer Errors" 8 name = "AlertServer Errors"
9 message = "The Error rate for the alertserver is too high, please check the logs ." 9 message = "The Error rate for the alertserver is too high, please check the logs ."
10 database = "graphite" 10 database = "graphite"
(...skipping 754 matching lines...) Expand 10 before | Expand all | Expand 10 after
765 Host info: https://status.skia.org/hosts?filter=%(buildslave)s""" 765 Host info: https://status.skia.org/hosts?filter=%(buildslave)s"""
766 database = "skmetrics" 766 database = "skmetrics"
767 query = "select mean(value) from \"buildbot.buildsteps.running-time\" where step != 'steps' AND step =~ /wait for device/ AND time > now() - 2m group by builder ,master,number,buildslave,step" 767 query = "select mean(value) from \"buildbot.buildsteps.running-time\" where step != 'steps' AND step =~ /wait for device/ AND time > now() - 2m group by builder ,master,number,buildslave,step"
768 category = "infra" 768 category = "infra"
769 condition = "x > 30 * 60 * 1000 * 1000 * 1000" 769 condition = "x > 30 * 60 * 1000 * 1000 * 1000"
770 actions = ["Email(infra-alerts@skia.org)"] 770 actions = ["Email(infra-alerts@skia.org)"]
771 auto-dismiss = true 771 auto-dismiss = true
772 nag = "1h" 772 nag = "1h"
773 773
774 # 774 #
775 # Buildbot Scheduler
776 #
777
778 [[rule]]
779 name = "Buildbot Scheduler Failing"
780 message = "The buildbot scheduler has failed to schedule builds for the last 10 minutes."
781 database = "skmetrics"
782 query = "select mean(value) from liveness where app='buildbot_scheduler' and hos t='skia-build-scheduler' and \"name\"='time-since-last-successful-scheduling' an d time > now() - 5m group by app, host"
783 category = "infra"
784 condition = "x > 10 * 60 * 1000 * 1000 * 1000"
785 actions = ["Email(infra-alerts@skia.org)"]
786 auto-dismiss = true
787 nag = "1h"
788
789 #
775 # CTFE 790 # CTFE
776 # 791 #
777 792
778 [[rule]] 793 [[rule]]
779 name = "skia-ctfe logserverd" 794 name = "skia-ctfe logserverd"
780 message = "skia-ctfe logserverd is not responding at http://skia-ctfe:10115" 795 message = "skia-ctfe logserverd is not responding at http://skia-ctfe:10115"
781 database = "graphite" 796 database = "graphite"
782 query = "select mean(value) from /^prober$/ where type='failure' AND probename=' skia_ctfe_logs' AND time > now() - 10m;" 797 query = "select mean(value) from /^prober$/ where type='failure' AND probename=' skia_ctfe_logs' AND time > now() - 10m;"
783 category = "infra" 798 category = "infra"
784 condition = "x >= 1" 799 condition = "x >= 1"
(...skipping 482 matching lines...) Expand 10 before | Expand all | Expand 10 after
1267 [[rule]] 1282 [[rule]]
1268 name = "datahopper_internal Google3-Autoroller up-to-date" 1283 name = "datahopper_internal Google3-Autoroller up-to-date"
1269 message = "Commit has not been picked up by Google3-Autoroller for over two hour s." 1284 message = "Commit has not been picked up by Google3-Autoroller for over two hour s."
1270 database = "graphite" 1285 database = "graphite"
1271 query = "select mean(value) from /^ingest-build-webhook.oldest-untested-commit-a ge.value$/ where app='internal' AND host='skia-internal' AND codename='Google3-A utoroller' AND time > now() - 10m" 1286 query = "select mean(value) from /^ingest-build-webhook.oldest-untested-commit-a ge.value$/ where app='internal' AND host='skia-internal' AND codename='Google3-A utoroller' AND time > now() - 10m"
1272 category = "infra" 1287 category = "infra"
1273 condition = "x >= 7200" 1288 condition = "x >= 7200"
1274 actions = ["Email(infra-alerts@skia.org)"] 1289 actions = ["Email(infra-alerts@skia.org)"]
1275 auto-dismiss = true 1290 auto-dismiss = true
1276 nag = "1h" 1291 nag = "1h"
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698