Chromium Code Reviews| Index: go/src/infra/monitoring/dispatcher/dispatcher.go |
| diff --git a/go/src/infra/monitoring/dispatcher/dispatcher.go b/go/src/infra/monitoring/dispatcher/dispatcher.go |
| index 2d493356a1cee68ddf1a78b044d81bf4ba14b7f4..495a0e5c73d33f26a59e5e051f7c4d3844904a55 100644 |
| --- a/go/src/infra/monitoring/dispatcher/dispatcher.go |
| +++ b/go/src/infra/monitoring/dispatcher/dispatcher.go |
| @@ -27,6 +27,9 @@ import ( |
| "github.com/luci/luci-go/common/clock" |
| "github.com/luci/luci-go/common/logging" |
| "github.com/luci/luci-go/common/logging/gologger" |
| + "github.com/luci/luci-go/common/tsmon" |
| + "github.com/luci/luci-go/common/tsmon/field" |
| + "github.com/luci/luci-go/common/tsmon/metric" |
| "golang.org/x/net/context" |
| @@ -76,6 +79,14 @@ var ( |
| expvars = expvar.NewMap("dispatcher") |
| errLog = log.New(os.Stderr, "", log.Lshortfile|log.Ltime) |
| infoLog = log.New(os.Stdout, "", log.Lshortfile|log.Ltime) |
| + |
| + // tsmon metrics |
| + iterations = metric.NewCounter("alerts-dispatcher/iterations", |
|
Vadim Sh.
2016/07/28 23:08:37
nit: I think we use '_' as separators in /chrome/i
seanmccullough1
2016/07/29 00:47:55
Done.
|
| + "Number if iterations of the main polling loop per run.", field.String("status")) |
|
Vadim Sh.
2016/07/28 23:08:37
'per run' is misleading, there'll be no (easy) way
seanmccullough1
2016/07/29 00:47:55
Done.
|
| + postErrors = metric.NewCounter("alerts-dispatcher/post-errors", |
| + "Number of posting errors per run.") |
| + alertCount = metric.NewInt("alerts-dispatcher/alert-count", |
|
Vadim Sh.
2016/07/28 23:08:37
I think this should be counter too. Will be easier
seanmccullough1
2016/07/29 00:47:55
Exactly the latter. The number of *new* alerts sin
|
| + "Number of alerts generated in an iteration.", field.String("tree")) |
| ) |
| func init() { |
| @@ -198,6 +209,7 @@ func mainLoop(ctx context.Context, a *analyzer.Analyzer, trees map[string]bool, |
| } |
| } |
| alerts.Timestamp = messages.TimeToEpochTime(time.Now()) |
| + alertCount.Set(ctx, int64(len(alerts.Alerts)), tree) |
| if *alertsBaseURL == "" { |
| infoLog.Printf("No data_url provided. Writing to %s-alerts.json", tree) |
| @@ -221,6 +233,7 @@ func mainLoop(ctx context.Context, a *analyzer.Analyzer, trees map[string]bool, |
| err := w.PostAlerts(alerts) |
| if err != nil { |
| errLog.Printf("Couldn't post alerts: %v", err) |
| + postErrors.Add(ctx, 1) |
| errs <- err |
| return |
| } |
| @@ -251,7 +264,6 @@ func main() { |
| ctx := context.Background() |
| ctx = gologger.StdConfig.Use(ctx) |
| logging.SetLevel(ctx, logging.Debug) |
| - |
| authOptions := auth.Options{ |
| ServiceAccountJSONPath: *serviceAccountJSON, |
| Scopes: []string{ |
| @@ -276,6 +288,12 @@ func main() { |
| } |
| ctx = context.Background() |
| + tsFlags := tsmon.NewFlags() |
| + tsFlags.Target.TargetType = "task" |
| + tsFlags.Target.TaskServiceName = "alerts-dispatcher" |
| + tsFlags.Flush = "auto" |
|
Vadim Sh.
2016/07/28 23:08:37
"manual"? Since tsmon.Flush is used manually below
seanmccullough1
2016/07/29 00:47:55
Done.
|
| + tsmon.InitializeFromFlags(ctx, &tsFlags) |
| + |
| // Start serving expvars. |
| go func() { |
| listener, err := net.Listen("tcp", "127.0.0.1:0") |
| @@ -404,7 +422,14 @@ func main() { |
| // This is the polling/analysis/alert posting function, which will run in a loop until |
| // a timeout or max errors is reached. |
| f := func(ctx context.Context) error { |
| - return mainLoop(ctx, a, trees, transport) |
| + err := mainLoop(ctx, a, trees, transport) |
| + if err == nil { |
| + iterations.Add(ctx, 1, "success") |
| + } else { |
| + iterations.Add(ctx, 1, "failure") |
| + } |
| + tsmon.Flush(ctx) |
|
Vadim Sh.
2016/07/28 23:08:37
how often would this be called? I think tsmon does
seanmccullough1
2016/07/29 00:47:55
Done.
|
| + return err |
| } |
| ctx, cancel := context.WithTimeout(ctx, duration) |
| @@ -412,6 +437,8 @@ func main() { |
| loopResults := looper.Run(ctx, f, cycle, *maxErrs, clock.GetSystemClock()) |
| + tsmon.Shutdown(ctx) |
| + |
| if !loopResults.Success { |
| errLog.Printf("Failed to run loop, %v errors", loopResults.Errs) |
| os.Exit(1) |