Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Dispatcher usage: | 5 // Dispatcher usage: |
| 6 // go run infra/monitoring/dispatcher | 6 // go run infra/monitoring/dispatcher |
| 7 // Expects gatekeeper.json to be in the current directory. | 7 // Expects gatekeeper.json to be in the current directory. |
| 8 | 8 |
| 9 package main | 9 package main |
| 10 | 10 |
| 11 import ( | 11 import ( |
| 12 "encoding/json" | 12 "encoding/json" |
| 13 "expvar" | 13 "expvar" |
| 14 "flag" | 14 "flag" |
| 15 "fmt" | 15 "fmt" |
| 16 "io/ioutil" | 16 "io/ioutil" |
| 17 "log" | 17 "log" |
| 18 "net" | 18 "net" |
| 19 "net/http" | 19 "net/http" |
| 20 "net/url" | 20 "net/url" |
| 21 "os" | 21 "os" |
| 22 "sort" | 22 "sort" |
| 23 "strings" | 23 "strings" |
| 24 "time" | 24 "time" |
| 25 | 25 |
| 26 "github.com/luci/luci-go/common/auth" | 26 "github.com/luci/luci-go/common/auth" |
| 27 "github.com/luci/luci-go/common/clock" | 27 "github.com/luci/luci-go/common/clock" |
| 28 "github.com/luci/luci-go/common/logging" | 28 "github.com/luci/luci-go/common/logging" |
| 29 "github.com/luci/luci-go/common/logging/gologger" | 29 "github.com/luci/luci-go/common/logging/gologger" |
| 30 "github.com/luci/luci-go/common/tsmon" | |
| 31 "github.com/luci/luci-go/common/tsmon/field" | |
| 32 "github.com/luci/luci-go/common/tsmon/metric" | |
| 30 | 33 |
| 31 "golang.org/x/net/context" | 34 "golang.org/x/net/context" |
| 32 | 35 |
| 33 "infra/monitoring/analyzer" | 36 "infra/monitoring/analyzer" |
| 34 "infra/monitoring/client" | 37 "infra/monitoring/client" |
| 35 "infra/monitoring/looper" | 38 "infra/monitoring/looper" |
| 36 "infra/monitoring/messages" | 39 "infra/monitoring/messages" |
| 37 ) | 40 ) |
| 38 | 41 |
| 39 type stringSlice []string | 42 type stringSlice []string |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 69 duration, cycle time.Duration | 72 duration, cycle time.Duration |
| 70 | 73 |
| 71 // gk is the gatekeeper config. | 74 // gk is the gatekeeper config. |
| 72 gks = []*messages.GatekeeperConfig{} | 75 gks = []*messages.GatekeeperConfig{} |
| 73 // gkt is the gatekeeper trees config. | 76 // gkt is the gatekeeper trees config. |
| 74 gkts = map[string][]messages.TreeMasterConfig{} | 77 gkts = map[string][]messages.TreeMasterConfig{} |
| 75 filteredFailures = uint64(0) | 78 filteredFailures = uint64(0) |
| 76 expvars = expvar.NewMap("dispatcher") | 79 expvars = expvar.NewMap("dispatcher") |
| 77 errLog = log.New(os.Stderr, "", log.Lshortfile|log.Ltime) | 80 errLog = log.New(os.Stderr, "", log.Lshortfile|log.Ltime) |
| 78 infoLog = log.New(os.Stdout, "", log.Lshortfile|log.Ltime) | 81 infoLog = log.New(os.Stdout, "", log.Lshortfile|log.Ltime) |
| 82 | |
| 83 // tsmon metrics | |
| 84 iterations = metric.NewCounter("alerts-dispatcher/iterations", | |
|
Vadim Sh.
2016/07/28 23:08:37
nit: I think we use '_' as separators in /chrome/i
seanmccullough1
2016/07/29 00:47:55
Done.
| |
| 85 "Number if iterations of the main polling loop per run.", field. String("status")) | |
|
Vadim Sh.
2016/07/28 23:08:37
'per run' is misleading, there'll be no (easy) way
seanmccullough1
2016/07/29 00:47:55
Done.
| |
| 86 postErrors = metric.NewCounter("alerts-dispatcher/post-errors", | |
| 87 "Number of posting errors per run.") | |
| 88 alertCount = metric.NewInt("alerts-dispatcher/alert-count", | |
|
Vadim Sh.
2016/07/28 23:08:37
I think this should be counter too. Will be easier
seanmccullough1
2016/07/29 00:47:55
Exactly the latter. The number of *new* alerts sin
| |
| 89 "Number of alerts generated in an iteration.", field.String("tre e")) | |
| 79 ) | 90 ) |
| 80 | 91 |
| 81 func init() { | 92 func init() { |
| 82 flag.Usage = func() { | 93 flag.Usage = func() { |
| 83 fmt.Printf("By default runs a single check, saves any alerts to ./alerts.json and exits.") | 94 fmt.Printf("By default runs a single check, saves any alerts to ./alerts.json and exits.") |
| 84 flag.PrintDefaults() | 95 flag.PrintDefaults() |
| 85 } | 96 } |
| 86 } | 97 } |
| 87 | 98 |
| 88 func analyzeBuildExtract(ctx context.Context, a *analyzer.Analyzer, tree string, masterURL *messages.MasterLocation, b *messages.BuildExtract) []messages.Alert { | 99 func analyzeBuildExtract(ctx context.Context, a *analyzer.Analyzer, tree string, masterURL *messages.MasterLocation, b *messages.BuildExtract) []messages.Alert { |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 191 errLog.Printf("Couldn't get revision summaries: %v", err) | 202 errLog.Printf("Couldn't get revision summaries: %v", err) |
| 192 continue | 203 continue |
| 193 } | 204 } |
| 194 for _, rev := range revs { | 205 for _, rev := range revs { |
| 195 alerts.RevisionSummaries [rev.GitHash] = rev | 206 alerts.RevisionSummaries [rev.GitHash] = rev |
| 196 } | 207 } |
| 197 } | 208 } |
| 198 } | 209 } |
| 199 } | 210 } |
| 200 alerts.Timestamp = messages.TimeToEpochTime(time.Now()) | 211 alerts.Timestamp = messages.TimeToEpochTime(time.Now()) |
| 212 alertCount.Set(ctx, int64(len(alerts.Alerts)), tree) | |
| 201 | 213 |
| 202 if *alertsBaseURL == "" { | 214 if *alertsBaseURL == "" { |
| 203 infoLog.Printf("No data_url provided. Writing to %s-alerts.json", tree) | 215 infoLog.Printf("No data_url provided. Writing to %s-alerts.json", tree) |
| 204 | 216 |
| 205 abytes, err := json.MarshalIndent(alerts, "", "\ t") | 217 abytes, err := json.MarshalIndent(alerts, "", "\ t") |
| 206 if err != nil { | 218 if err != nil { |
| 207 errLog.Printf("Couldn't marshal alerts j son: %v", err) | 219 errLog.Printf("Couldn't marshal alerts j son: %v", err) |
| 208 errs <- err | 220 errs <- err |
| 209 return | 221 return |
| 210 } | 222 } |
| 211 | 223 |
| 212 if err := ioutil.WriteFile(fmt.Sprintf("%s-alert s.json", tree), abytes, 0644); err != nil { | 224 if err := ioutil.WriteFile(fmt.Sprintf("%s-alert s.json", tree), abytes, 0644); err != nil { |
| 213 errLog.Printf("Couldn't write to alerts. json: %v", err) | 225 errLog.Printf("Couldn't write to alerts. json: %v", err) |
| 214 errs <- err | 226 errs <- err |
| 215 return | 227 return |
| 216 } | 228 } |
| 217 } else { | 229 } else { |
| 218 alertsURL := fmt.Sprintf("%s/%s", *alertsBaseURL , tree) | 230 alertsURL := fmt.Sprintf("%s/%s", *alertsBaseURL , tree) |
| 219 w := client.NewWriter(alertsURL, transport) | 231 w := client.NewWriter(alertsURL, transport) |
| 220 infoLog.Printf("Posting alerts to %s", alertsURL ) | 232 infoLog.Printf("Posting alerts to %s", alertsURL ) |
| 221 err := w.PostAlerts(alerts) | 233 err := w.PostAlerts(alerts) |
| 222 if err != nil { | 234 if err != nil { |
| 223 errLog.Printf("Couldn't post alerts: %v" , err) | 235 errLog.Printf("Couldn't post alerts: %v" , err) |
| 236 postErrors.Add(ctx, 1) | |
| 224 errs <- err | 237 errs <- err |
| 225 return | 238 return |
| 226 } | 239 } |
| 227 } | 240 } |
| 228 | 241 |
| 229 infoLog.Printf("Filtered failures: %v", filteredFailures ) | 242 infoLog.Printf("Filtered failures: %v", filteredFailures ) |
| 230 done <- nil | 243 done <- nil |
| 231 }() | 244 }() |
| 232 } | 245 } |
| 233 | 246 |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 244 | 257 |
| 245 func main() { | 258 func main() { |
| 246 flag.Var(&gatekeeperJSON, "gatekeeper", "Location of gatekeeper json fil e. Can have multiple comma separated values.") | 259 flag.Var(&gatekeeperJSON, "gatekeeper", "Location of gatekeeper json fil e. Can have multiple comma separated values.") |
| 247 flag.Var(&gatekeeperTreesJSON, "gatekeeper-trees", "Location of gatekeep er tree json file. Can have multiple comma separated values.") | 260 flag.Var(&gatekeeperTreesJSON, "gatekeeper-trees", "Location of gatekeep er tree json file. Can have multiple comma separated values.") |
| 248 | 261 |
| 249 flag.Parse() | 262 flag.Parse() |
| 250 | 263 |
| 251 ctx := context.Background() | 264 ctx := context.Background() |
| 252 ctx = gologger.StdConfig.Use(ctx) | 265 ctx = gologger.StdConfig.Use(ctx) |
| 253 logging.SetLevel(ctx, logging.Debug) | 266 logging.SetLevel(ctx, logging.Debug) |
| 254 | |
| 255 authOptions := auth.Options{ | 267 authOptions := auth.Options{ |
| 256 ServiceAccountJSONPath: *serviceAccountJSON, | 268 ServiceAccountJSONPath: *serviceAccountJSON, |
| 257 Scopes: []string{ | 269 Scopes: []string{ |
| 258 auth.OAuthScopeEmail, | 270 auth.OAuthScopeEmail, |
| 259 "https://www.googleapis.com/auth/projecthosting", | 271 "https://www.googleapis.com/auth/projecthosting", |
| 260 }, | 272 }, |
| 261 Method: auth.ServiceAccountMethod, | 273 Method: auth.ServiceAccountMethod, |
| 262 } | 274 } |
| 263 | 275 |
| 264 mode := auth.SilentLogin | 276 mode := auth.SilentLogin |
| 265 if *login { | 277 if *login { |
| 266 mode = auth.InteractiveLogin | 278 mode = auth.InteractiveLogin |
| 267 } | 279 } |
| 268 | 280 |
| 269 transport, err := auth.NewAuthenticator(ctx, mode, authOptions).Transpor t() | 281 transport, err := auth.NewAuthenticator(ctx, mode, authOptions).Transpor t() |
| 270 if err != nil { | 282 if err != nil { |
| 271 errLog.Printf("AuthenticatedTransport: %v", err) | 283 errLog.Printf("AuthenticatedTransport: %v", err) |
| 272 if !*login { | 284 if !*login { |
| 273 errLog.Printf("Consider re-running with -login") | 285 errLog.Printf("Consider re-running with -login") |
| 274 } | 286 } |
| 275 os.Exit(1) | 287 os.Exit(1) |
| 276 } | 288 } |
| 277 ctx = context.Background() | 289 ctx = context.Background() |
| 278 | 290 |
| 291 tsFlags := tsmon.NewFlags() | |
| 292 tsFlags.Target.TargetType = "task" | |
| 293 tsFlags.Target.TaskServiceName = "alerts-dispatcher" | |
| 294 tsFlags.Flush = "auto" | |
|
Vadim Sh.
2016/07/28 23:08:37
"manual"? Since tsmon.Flush is used manually below
seanmccullough1
2016/07/29 00:47:55
Done.
| |
| 295 tsmon.InitializeFromFlags(ctx, &tsFlags) | |
| 296 | |
| 279 // Start serving expvars. | 297 // Start serving expvars. |
| 280 go func() { | 298 go func() { |
| 281 listener, err := net.Listen("tcp", "127.0.0.1:0") | 299 listener, err := net.Listen("tcp", "127.0.0.1:0") |
| 282 if err != nil { | 300 if err != nil { |
| 283 errLog.Printf("Listen: %s", err) | 301 errLog.Printf("Listen: %s", err) |
| 284 os.Exit(1) | 302 os.Exit(1) |
| 285 } | 303 } |
| 286 | 304 |
| 287 infoLog.Printf("expvars listening on %v", listener.Addr()) | 305 infoLog.Printf("expvars listening on %v", listener.Addr()) |
| 288 | 306 |
| (...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 397 for tree := range trees { | 415 for tree := range trees { |
| 398 if _, ok := gkts[tree]; !ok { | 416 if _, ok := gkts[tree]; !ok { |
| 399 errLog.Printf("Unrecognized tree name: %s", tree) | 417 errLog.Printf("Unrecognized tree name: %s", tree) |
| 400 os.Exit(1) | 418 os.Exit(1) |
| 401 } | 419 } |
| 402 } | 420 } |
| 403 | 421 |
| 404 // This is the polling/analysis/alert posting function, which will run i n a loop until | 422 // This is the polling/analysis/alert posting function, which will run i n a loop until |
| 405 // a timeout or max errors is reached. | 423 // a timeout or max errors is reached. |
| 406 f := func(ctx context.Context) error { | 424 f := func(ctx context.Context) error { |
| 407 » » return mainLoop(ctx, a, trees, transport) | 425 » » err := mainLoop(ctx, a, trees, transport) |
| 426 » » if err == nil { | |
| 427 » » » iterations.Add(ctx, 1, "success") | |
| 428 » » } else { | |
| 429 » » » iterations.Add(ctx, 1, "failure") | |
| 430 » » } | |
| 431 » » tsmon.Flush(ctx) | |
|
Vadim Sh.
2016/07/28 23:08:37
how often would this be called? I think tsmon does
seanmccullough1
2016/07/29 00:47:55
Done.
| |
| 432 » » return err | |
| 408 } | 433 } |
| 409 | 434 |
| 410 ctx, cancel := context.WithTimeout(ctx, duration) | 435 ctx, cancel := context.WithTimeout(ctx, duration) |
| 411 defer cancel() | 436 defer cancel() |
| 412 | 437 |
| 413 loopResults := looper.Run(ctx, f, cycle, *maxErrs, clock.GetSystemClock( )) | 438 loopResults := looper.Run(ctx, f, cycle, *maxErrs, clock.GetSystemClock( )) |
| 414 | 439 |
| 440 tsmon.Shutdown(ctx) | |
| 441 | |
| 415 if !loopResults.Success { | 442 if !loopResults.Success { |
| 416 errLog.Printf("Failed to run loop, %v errors", loopResults.Errs) | 443 errLog.Printf("Failed to run loop, %v errors", loopResults.Errs) |
| 417 os.Exit(1) | 444 os.Exit(1) |
| 418 } | 445 } |
| 419 } | 446 } |
| OLD | NEW |