Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(113)

Side by Side Diff: alertserver/alerts.cfg

Issue 1156253004: Fix URLs and queries in prober and alerts (Closed) Base URL: https://skia.googlesource.com/buildbot@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | prober/probers.json » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file defines alerts to be triggered by the server. 1 # This file defines alerts to be triggered by the server.
2 2
3 # 3 #
4 # SkiaPerf and SkiaGold 4 # SkiaPerf and SkiaGold
5 # 5 #
6 6
7 [[rule]] 7 [[rule]]
8 name = "Perf Alerts" 8 name = "Perf Alerts"
9 message = "At least one perf alert has been found. Please visit https://perf.ski a.org/alerts/ to triage." 9 message = "At least one perf alert has been found. Please visit https://perf.ski a.org/alerts/ to triage."
10 query = "select value from /skiaperf.skia-perf.alerting.new.value/ limit 1" 10 query = "select value from /skiaperf.skia-perf.alerting.new.value/ limit 1"
11 category = "Perf" 11 category = "Perf"
12 condition = "x > 0" 12 condition = "x > 0"
13 actions = ["Email(alerts@skia.org)"] 13 actions = ["Email(alerts@skia.org)"]
14 auto-dismiss = true 14 auto-dismiss = true
15 nag = "24h" 15 nag = "24h"
16 16
17 [[rule]] 17 [[rule]]
18 name = "Gold Alert (GM)" 18 name = "Gold Alert (GM)"
19 message = "At least one untriaged GM has been found. Please visit https://gold.s kia.org/ to triage." 19 message = "At least one untriaged GM has been found. Please visit https://gold.s kia.org/ to triage."
20 query = "select value from /skiacorrectness.skia-gold-prod.status.untriaged.by_c orpus.gm.value/ limit 1" 20 query = "select value from /^skiacorrectness.skia-gold-prod.status.untriaged.by_ corpus.gm.value$/ limit 1"
21 category = "Gold" 21 category = "Gold"
22 condition = "x > 0" 22 condition = "x > 0"
23 actions = ["Email(alerts@skia.org)"] 23 actions = ["Email(alerts@skia.org)"]
24 auto-dismiss = true 24 auto-dismiss = true
25 nag = "24h" 25 nag = "24h"
26 26
27 [[rule]] 27 [[rule]]
28 name = "Expired Ingores (Gold)" 28 name = "Expired Ingores (Gold)"
29 message = "At least one expired ignore rule has been found. Please visit https:/ /gold.skia.org/2/ignores to delete or extend." 29 message = "At least one expired ignore rule has been found. Please visit https:/ /gold.skia.org/ignores to delete or extend."
30 query = "select value from /skiacorrectness.skia-gold-prod.num-expired-ignore-ru les.value/ limit 1" 30 query = "select value from /^skiacorrectness.skia-gold-prod.num-expired-ignore-r ules.value$/ limit 1"
31 category = "Gold" 31 category = "Gold"
32 condition = "x > 0" 32 condition = "x > 0"
33 actions = ["Email(alerts@skia.org)"] 33 actions = ["Email(alerts@skia.org)"]
34 auto-dismiss = true 34 auto-dismiss = true
35 nag = "24h" 35 nag = "24h"
36 36
37 [[rule]] 37 [[rule]]
38 name = "Ingestion Failure (Perf)" 38 name = "Ingestion Failure (Perf)"
39 message = "At least two rounds of perf ingestion have failed back to back." 39 message = "At least two rounds of perf ingestion have failed back to back."
40 query = "select mean(value) from /ingest.skia-perf.ingester.nano-ingest.gauge.ti me-since-last-successful-update.value/ where time > now() - 10m" 40 query = "select mean(value) from /ingest.skia-perf.ingester.nano-ingest.gauge.ti me-since-last-successful-update.value/ where time > now() - 10m"
(...skipping 19 matching lines...) Expand all
60 query = "select mean(value) from /ingest.skia-gold-prod.ingester.gold-ingest.gau ge.time-since-last-successful-update.value/ where time > now() - 10m" 60 query = "select mean(value) from /ingest.skia-gold-prod.ingester.gold-ingest.gau ge.time-since-last-successful-update.value/ where time > now() - 10m"
61 category = "infra" 61 category = "infra"
62 condition = "x >= 750" 62 condition = "x >= 750"
63 actions = ["Email(infra-alerts@skia.org)"] 63 actions = ["Email(infra-alerts@skia.org)"]
64 auto-dismiss = true 64 auto-dismiss = true
65 nag = "1h" 65 nag = "1h"
66 66
67 [[rule]] 67 [[rule]]
68 name = "Ingore Monitoring Failure (Gold)" 68 name = "Ingore Monitoring Failure (Gold)"
69 message = "At least two rounds of monitoring for expired ignore rules have faile d back to back." 69 message = "At least two rounds of monitoring for expired ignore rules have faile d back to back."
70 query = "select mean(value) from /skiacorrectness.skia-gold-prod.expired-ignore- rules-monitoring.time-since-last-successful-update.value/ where time > now() - 1 0m" 70 query = "select mean(value) from /^skiacorrectness.skia-gold-prod.expired-ignore -rules-monitoring.time-since-last-successful-update.value$/ where time > now() - 10m"
71 category = "infra" 71 category = "infra"
72 condition = "x >= 200" 72 condition = "x >= 200"
73 actions = ["Email(infra-alerts@skia.org)"] 73 actions = ["Email(infra-alerts@skia.org)"]
74 auto-dismiss = true 74 auto-dismiss = true
75 nag = "1h" 75 nag = "1h"
76 76
77 [[rule]]
78 name = "Gold Hash Prober"
79 message = "The list of currently considered image digests is not accessible at h ttps://gold.skia.org/_/hashes"
80 query = "select mean(value) from /^prober.skiagold_hashes.failure.value$/ where time > now() - 10m;"
81 category = "infra"
82 condition = "x >= 1"
83 actions = ["Email(infra-alerts@skia.org)"]
84 auto-dismiss = false
85 nag = "1h"
86
77 # 87 #
78 # SkFiddle 88 # SkFiddle
79 # 89 #
80 90
81 [[rule]] 91 [[rule]]
82 name = "Skia Fiddle Prober (main page)" 92 name = "Skia Fiddle Prober (main page)"
83 message = "The main page at http://skfiddle.com has failed." 93 message = "The main page at http://skfiddle.com has failed."
84 query = "select mean(value) from /prober.skfiddle.failure.value/ where time > no w() - 10m;" 94 query = "select mean(value) from /prober.skfiddle.failure.value/ where time > no w() - 10m;"
85 category = "infra" 95 category = "infra"
86 condition = "x >= 1" 96 condition = "x >= 1"
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
371 381
372 [[rule]] 382 [[rule]]
373 name = "Probe Failure (issue-tracker)" 383 name = "Probe Failure (issue-tracker)"
374 message = "Ingesting issue tracker issued has failed to run in at least 30 minut es." 384 message = "Ingesting issue tracker issued has failed to run in at least 30 minut es."
375 query = "select mean(value) from /probeserver.skia-monitoring.issue-tracker.time -since-last-successful-update.value/ where time > now() - 10m" 385 query = "select mean(value) from /probeserver.skia-monitoring.issue-tracker.time -since-last-successful-update.value/ where time > now() - 10m"
376 category = "infra" 386 category = "infra"
377 condition = "x >= 1800" 387 condition = "x >= 1800"
378 actions = ["Email(infra-alerts@skia.org)"] 388 actions = ["Email(infra-alerts@skia.org)"]
379 auto-dismiss = true 389 auto-dismiss = true
380 nag = "1h" 390 nag = "1h"
OLDNEW
« no previous file with comments | « no previous file | prober/probers.json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698