Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(160)

Side by Side Diff: go/src/infra/monitoring/dispatcher/dispatcher.go

Issue 2190163002: [som] add tsmon to alerts-dispatcher (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: count alerts per-tree Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Dispatcher usage: 5 // Dispatcher usage:
6 // go run infra/monitoring/dispatcher 6 // go run infra/monitoring/dispatcher
7 // Expects gatekeeper.json to be in the current directory. 7 // Expects gatekeeper.json to be in the current directory.
8 8
9 package main 9 package main
10 10
11 import ( 11 import (
12 "encoding/json" 12 "encoding/json"
13 "expvar" 13 "expvar"
14 "flag" 14 "flag"
15 "fmt" 15 "fmt"
16 "io/ioutil" 16 "io/ioutil"
17 "log" 17 "log"
18 "net" 18 "net"
19 "net/http" 19 "net/http"
20 "net/url" 20 "net/url"
21 "os" 21 "os"
22 "sort" 22 "sort"
23 "strings" 23 "strings"
24 "time" 24 "time"
25 25
26 "github.com/luci/luci-go/common/auth" 26 "github.com/luci/luci-go/common/auth"
27 "github.com/luci/luci-go/common/clock" 27 "github.com/luci/luci-go/common/clock"
28 "github.com/luci/luci-go/common/logging" 28 "github.com/luci/luci-go/common/logging"
29 "github.com/luci/luci-go/common/logging/gologger" 29 "github.com/luci/luci-go/common/logging/gologger"
30 "github.com/luci/luci-go/common/tsmon"
31 "github.com/luci/luci-go/common/tsmon/field"
32 "github.com/luci/luci-go/common/tsmon/metric"
30 33
31 "golang.org/x/net/context" 34 "golang.org/x/net/context"
32 35
33 "infra/monitoring/analyzer" 36 "infra/monitoring/analyzer"
34 "infra/monitoring/client" 37 "infra/monitoring/client"
35 "infra/monitoring/looper" 38 "infra/monitoring/looper"
36 "infra/monitoring/messages" 39 "infra/monitoring/messages"
37 ) 40 )
38 41
39 type stringSlice []string 42 type stringSlice []string
(...skipping 29 matching lines...) Expand all
69 duration, cycle time.Duration 72 duration, cycle time.Duration
70 73
71 // gk is the gatekeeper config. 74 // gk is the gatekeeper config.
72 gks = []*messages.GatekeeperConfig{} 75 gks = []*messages.GatekeeperConfig{}
73 // gkt is the gatekeeper trees config. 76 // gkt is the gatekeeper trees config.
74 gkts = map[string][]messages.TreeMasterConfig{} 77 gkts = map[string][]messages.TreeMasterConfig{}
75 filteredFailures = uint64(0) 78 filteredFailures = uint64(0)
76 expvars = expvar.NewMap("dispatcher") 79 expvars = expvar.NewMap("dispatcher")
77 errLog = log.New(os.Stderr, "", log.Lshortfile|log.Ltime) 80 errLog = log.New(os.Stderr, "", log.Lshortfile|log.Ltime)
78 infoLog = log.New(os.Stdout, "", log.Lshortfile|log.Ltime) 81 infoLog = log.New(os.Stdout, "", log.Lshortfile|log.Ltime)
82
83 // tsmon metrics
84 iterations = metric.NewCounter("alerts-dispatcher/iterations",
Vadim Sh. 2016/07/28 23:08:37 nit: I think we use '_' as separators in /chrome/i
seanmccullough1 2016/07/29 00:47:55 Done.
85 "Number if iterations of the main polling loop per run.", field. String("status"))
Vadim Sh. 2016/07/28 23:08:37 'per run' is misleading, there'll be no (easy) way
seanmccullough1 2016/07/29 00:47:55 Done.
86 postErrors = metric.NewCounter("alerts-dispatcher/post-errors",
87 "Number of posting errors per run.")
88 alertCount = metric.NewInt("alerts-dispatcher/alert-count",
Vadim Sh. 2016/07/28 23:08:37 I think this should be counter too. Will be easier
seanmccullough1 2016/07/29 00:47:55 Exactly the latter. The number of *new* alerts sin
89 "Number of alerts generated in an iteration.", field.String("tre e"))
79 ) 90 )
80 91
81 func init() { 92 func init() {
82 flag.Usage = func() { 93 flag.Usage = func() {
83 fmt.Printf("By default runs a single check, saves any alerts to ./alerts.json and exits.") 94 fmt.Printf("By default runs a single check, saves any alerts to ./alerts.json and exits.")
84 flag.PrintDefaults() 95 flag.PrintDefaults()
85 } 96 }
86 } 97 }
87 98
88 func analyzeBuildExtract(ctx context.Context, a *analyzer.Analyzer, tree string, masterURL *messages.MasterLocation, b *messages.BuildExtract) []messages.Alert { 99 func analyzeBuildExtract(ctx context.Context, a *analyzer.Analyzer, tree string, masterURL *messages.MasterLocation, b *messages.BuildExtract) []messages.Alert {
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 errLog.Printf("Couldn't get revision summaries: %v", err) 202 errLog.Printf("Couldn't get revision summaries: %v", err)
192 continue 203 continue
193 } 204 }
194 for _, rev := range revs { 205 for _, rev := range revs {
195 alerts.RevisionSummaries [rev.GitHash] = rev 206 alerts.RevisionSummaries [rev.GitHash] = rev
196 } 207 }
197 } 208 }
198 } 209 }
199 } 210 }
200 alerts.Timestamp = messages.TimeToEpochTime(time.Now()) 211 alerts.Timestamp = messages.TimeToEpochTime(time.Now())
212 alertCount.Set(ctx, int64(len(alerts.Alerts)), tree)
201 213
202 if *alertsBaseURL == "" { 214 if *alertsBaseURL == "" {
203 infoLog.Printf("No data_url provided. Writing to %s-alerts.json", tree) 215 infoLog.Printf("No data_url provided. Writing to %s-alerts.json", tree)
204 216
205 abytes, err := json.MarshalIndent(alerts, "", "\ t") 217 abytes, err := json.MarshalIndent(alerts, "", "\ t")
206 if err != nil { 218 if err != nil {
207 errLog.Printf("Couldn't marshal alerts j son: %v", err) 219 errLog.Printf("Couldn't marshal alerts j son: %v", err)
208 errs <- err 220 errs <- err
209 return 221 return
210 } 222 }
211 223
212 if err := ioutil.WriteFile(fmt.Sprintf("%s-alert s.json", tree), abytes, 0644); err != nil { 224 if err := ioutil.WriteFile(fmt.Sprintf("%s-alert s.json", tree), abytes, 0644); err != nil {
213 errLog.Printf("Couldn't write to alerts. json: %v", err) 225 errLog.Printf("Couldn't write to alerts. json: %v", err)
214 errs <- err 226 errs <- err
215 return 227 return
216 } 228 }
217 } else { 229 } else {
218 alertsURL := fmt.Sprintf("%s/%s", *alertsBaseURL , tree) 230 alertsURL := fmt.Sprintf("%s/%s", *alertsBaseURL , tree)
219 w := client.NewWriter(alertsURL, transport) 231 w := client.NewWriter(alertsURL, transport)
220 infoLog.Printf("Posting alerts to %s", alertsURL ) 232 infoLog.Printf("Posting alerts to %s", alertsURL )
221 err := w.PostAlerts(alerts) 233 err := w.PostAlerts(alerts)
222 if err != nil { 234 if err != nil {
223 errLog.Printf("Couldn't post alerts: %v" , err) 235 errLog.Printf("Couldn't post alerts: %v" , err)
236 postErrors.Add(ctx, 1)
224 errs <- err 237 errs <- err
225 return 238 return
226 } 239 }
227 } 240 }
228 241
229 infoLog.Printf("Filtered failures: %v", filteredFailures ) 242 infoLog.Printf("Filtered failures: %v", filteredFailures )
230 done <- nil 243 done <- nil
231 }() 244 }()
232 } 245 }
233 246
(...skipping 10 matching lines...) Expand all
244 257
245 func main() { 258 func main() {
246 flag.Var(&gatekeeperJSON, "gatekeeper", "Location of gatekeeper json fil e. Can have multiple comma separated values.") 259 flag.Var(&gatekeeperJSON, "gatekeeper", "Location of gatekeeper json fil e. Can have multiple comma separated values.")
247 flag.Var(&gatekeeperTreesJSON, "gatekeeper-trees", "Location of gatekeep er tree json file. Can have multiple comma separated values.") 260 flag.Var(&gatekeeperTreesJSON, "gatekeeper-trees", "Location of gatekeep er tree json file. Can have multiple comma separated values.")
248 261
249 flag.Parse() 262 flag.Parse()
250 263
251 ctx := context.Background() 264 ctx := context.Background()
252 ctx = gologger.StdConfig.Use(ctx) 265 ctx = gologger.StdConfig.Use(ctx)
253 logging.SetLevel(ctx, logging.Debug) 266 logging.SetLevel(ctx, logging.Debug)
254
255 authOptions := auth.Options{ 267 authOptions := auth.Options{
256 ServiceAccountJSONPath: *serviceAccountJSON, 268 ServiceAccountJSONPath: *serviceAccountJSON,
257 Scopes: []string{ 269 Scopes: []string{
258 auth.OAuthScopeEmail, 270 auth.OAuthScopeEmail,
259 "https://www.googleapis.com/auth/projecthosting", 271 "https://www.googleapis.com/auth/projecthosting",
260 }, 272 },
261 Method: auth.ServiceAccountMethod, 273 Method: auth.ServiceAccountMethod,
262 } 274 }
263 275
264 mode := auth.SilentLogin 276 mode := auth.SilentLogin
265 if *login { 277 if *login {
266 mode = auth.InteractiveLogin 278 mode = auth.InteractiveLogin
267 } 279 }
268 280
269 transport, err := auth.NewAuthenticator(ctx, mode, authOptions).Transpor t() 281 transport, err := auth.NewAuthenticator(ctx, mode, authOptions).Transpor t()
270 if err != nil { 282 if err != nil {
271 errLog.Printf("AuthenticatedTransport: %v", err) 283 errLog.Printf("AuthenticatedTransport: %v", err)
272 if !*login { 284 if !*login {
273 errLog.Printf("Consider re-running with -login") 285 errLog.Printf("Consider re-running with -login")
274 } 286 }
275 os.Exit(1) 287 os.Exit(1)
276 } 288 }
277 ctx = context.Background() 289 ctx = context.Background()
278 290
291 tsFlags := tsmon.NewFlags()
292 tsFlags.Target.TargetType = "task"
293 tsFlags.Target.TaskServiceName = "alerts-dispatcher"
294 tsFlags.Flush = "auto"
Vadim Sh. 2016/07/28 23:08:37 "manual"? Since tsmon.Flush is used manually below
seanmccullough1 2016/07/29 00:47:55 Done.
295 tsmon.InitializeFromFlags(ctx, &tsFlags)
296
279 // Start serving expvars. 297 // Start serving expvars.
280 go func() { 298 go func() {
281 listener, err := net.Listen("tcp", "127.0.0.1:0") 299 listener, err := net.Listen("tcp", "127.0.0.1:0")
282 if err != nil { 300 if err != nil {
283 errLog.Printf("Listen: %s", err) 301 errLog.Printf("Listen: %s", err)
284 os.Exit(1) 302 os.Exit(1)
285 } 303 }
286 304
287 infoLog.Printf("expvars listening on %v", listener.Addr()) 305 infoLog.Printf("expvars listening on %v", listener.Addr())
288 306
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 for tree := range trees { 415 for tree := range trees {
398 if _, ok := gkts[tree]; !ok { 416 if _, ok := gkts[tree]; !ok {
399 errLog.Printf("Unrecognized tree name: %s", tree) 417 errLog.Printf("Unrecognized tree name: %s", tree)
400 os.Exit(1) 418 os.Exit(1)
401 } 419 }
402 } 420 }
403 421
404 // This is the polling/analysis/alert posting function, which will run i n a loop until 422 // This is the polling/analysis/alert posting function, which will run i n a loop until
405 // a timeout or max errors is reached. 423 // a timeout or max errors is reached.
406 f := func(ctx context.Context) error { 424 f := func(ctx context.Context) error {
407 » » return mainLoop(ctx, a, trees, transport) 425 » » err := mainLoop(ctx, a, trees, transport)
426 » » if err == nil {
427 » » » iterations.Add(ctx, 1, "success")
428 » » } else {
429 » » » iterations.Add(ctx, 1, "failure")
430 » » }
431 » » tsmon.Flush(ctx)
Vadim Sh. 2016/07/28 23:08:37 how often would this be called? I think tsmon does
seanmccullough1 2016/07/29 00:47:55 Done.
432 » » return err
408 } 433 }
409 434
410 ctx, cancel := context.WithTimeout(ctx, duration) 435 ctx, cancel := context.WithTimeout(ctx, duration)
411 defer cancel() 436 defer cancel()
412 437
413 loopResults := looper.Run(ctx, f, cycle, *maxErrs, clock.GetSystemClock( )) 438 loopResults := looper.Run(ctx, f, cycle, *maxErrs, clock.GetSystemClock( ))
414 439
440 tsmon.Shutdown(ctx)
441
415 if !loopResults.Success { 442 if !loopResults.Success {
416 errLog.Printf("Failed to run loop, %v errors", loopResults.Errs) 443 errLog.Printf("Failed to run loop, %v errors", loopResults.Errs)
417 os.Exit(1) 444 os.Exit(1)
418 } 445 }
419 } 446 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698