| OLD | NEW |
| 1 # This file defines alerts to be triggered by the server. | 1 # This file defines alerts to be triggered by the server. |
| 2 | 2 |
| 3 # | 3 # |
| 4 # AlertServer should tolerate no errors. | 4 # AlertServer should tolerate no errors. |
| 5 | 5 |
| 6 | 6 |
| 7 [[rule]] | 7 [[rule]] |
| 8 name = "AlertServer Errors" | 8 name = "AlertServer Errors" |
| 9 message = "The Error rate for the alertserver is too high, please check the logs
." | 9 message = "The Error rate for the alertserver is too high, please check the logs
." |
| 10 database = "graphite" | 10 database = "graphite" |
| (...skipping 754 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 765 Host info: https://status.skia.org/hosts?filter=%(buildslave)s""" | 765 Host info: https://status.skia.org/hosts?filter=%(buildslave)s""" |
| 766 database = "skmetrics" | 766 database = "skmetrics" |
| 767 query = "select mean(value) from \"buildbot.buildsteps.running-time\" where step
!= 'steps' AND step =~ /wait for device/ AND time > now() - 2m group by builder
,master,number,buildslave,step" | 767 query = "select mean(value) from \"buildbot.buildsteps.running-time\" where step
!= 'steps' AND step =~ /wait for device/ AND time > now() - 2m group by builder
,master,number,buildslave,step" |
| 768 category = "infra" | 768 category = "infra" |
| 769 condition = "x > 30 * 60 * 1000 * 1000 * 1000" | 769 condition = "x > 30 * 60 * 1000 * 1000 * 1000" |
| 770 actions = ["Email(infra-alerts@skia.org)"] | 770 actions = ["Email(infra-alerts@skia.org)"] |
| 771 auto-dismiss = true | 771 auto-dismiss = true |
| 772 nag = "1h" | 772 nag = "1h" |
| 773 | 773 |
| 774 # | 774 # |
| 775 # Buildbot Scheduler |
| 776 # |
| 777 |
| 778 [[rule]] |
| 779 name = "Buildbot Scheduler Failing" |
| 780 message = "The buildbot scheduler has failed to schedule builds for the last 10
minutes." |
| 781 database = "skmetrics" |
| 782 query = "select mean(value) from liveness where app='buildbot_scheduler' and hos
t='skia-build-scheduler' and \"name\"='time-since-last-successful-scheduling' an
d time > now() - 5m group by app, host" |
| 783 category = "infra" |
| 784 condition = "x > 10 * 60 * 1000 * 1000 * 1000" |
| 785 actions = ["Email(infra-alerts@skia.org)"] |
| 786 auto-dismiss = true |
| 787 nag = "1h" |
| 788 |
| 789 # |
| 775 # CTFE | 790 # CTFE |
| 776 # | 791 # |
| 777 | 792 |
| 778 [[rule]] | 793 [[rule]] |
| 779 name = "skia-ctfe logserverd" | 794 name = "skia-ctfe logserverd" |
| 780 message = "skia-ctfe logserverd is not responding at http://skia-ctfe:10115" | 795 message = "skia-ctfe logserverd is not responding at http://skia-ctfe:10115" |
| 781 database = "graphite" | 796 database = "graphite" |
| 782 query = "select mean(value) from /^prober$/ where type='failure' AND probename='
skia_ctfe_logs' AND time > now() - 10m;" | 797 query = "select mean(value) from /^prober$/ where type='failure' AND probename='
skia_ctfe_logs' AND time > now() - 10m;" |
| 783 category = "infra" | 798 category = "infra" |
| 784 condition = "x >= 1" | 799 condition = "x >= 1" |
| (...skipping 482 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1267 [[rule]] | 1282 [[rule]] |
| 1268 name = "datahopper_internal Google3-Autoroller up-to-date" | 1283 name = "datahopper_internal Google3-Autoroller up-to-date" |
| 1269 message = "Commit has not been picked up by Google3-Autoroller for over two hour
s." | 1284 message = "Commit has not been picked up by Google3-Autoroller for over two hour
s." |
| 1270 database = "graphite" | 1285 database = "graphite" |
| 1271 query = "select mean(value) from /^ingest-build-webhook.oldest-untested-commit-a
ge.value$/ where app='internal' AND host='skia-internal' AND codename='Google3-A
utoroller' AND time > now() - 10m" | 1286 query = "select mean(value) from /^ingest-build-webhook.oldest-untested-commit-a
ge.value$/ where app='internal' AND host='skia-internal' AND codename='Google3-A
utoroller' AND time > now() - 10m" |
| 1272 category = "infra" | 1287 category = "infra" |
| 1273 condition = "x >= 7200" | 1288 condition = "x >= 7200" |
| 1274 actions = ["Email(infra-alerts@skia.org)"] | 1289 actions = ["Email(infra-alerts@skia.org)"] |
| 1275 auto-dismiss = true | 1290 auto-dismiss = true |
| 1276 nag = "1h" | 1291 nag = "1h" |
| OLD | NEW |