| OLD | NEW |
| 1 package alerting | 1 package alerting |
| 2 | 2 |
| 3 import ( | 3 import ( |
| 4 "database/sql" | 4 "database/sql" |
| 5 "encoding/json" | 5 "encoding/json" |
| 6 "fmt" | 6 "fmt" |
| 7 "math" | 7 "math" |
| 8 "time" | 8 "time" |
| 9 | 9 |
| 10 metrics "github.com/rcrowley/go-metrics" | 10 metrics "github.com/rcrowley/go-metrics" |
| 11 "github.com/skia-dev/glog" | 11 "github.com/skia-dev/glog" |
| 12 | 12 |
| 13 "go.skia.org/infra/go/issues" | 13 "go.skia.org/infra/go/issues" |
| 14 "go.skia.org/infra/go/metadata" | 14 "go.skia.org/infra/go/metadata" |
| 15 » "go.skia.org/infra/go/tiling" | 15 » tracedb "go.skia.org/infra/go/trace/db" |
| 16 "go.skia.org/infra/go/util" | 16 "go.skia.org/infra/go/util" |
| 17 "go.skia.org/infra/perf/go/clustering" | 17 "go.skia.org/infra/perf/go/clustering" |
| 18 "go.skia.org/infra/perf/go/config" | 18 "go.skia.org/infra/perf/go/config" |
| 19 "go.skia.org/infra/perf/go/db" | 19 "go.skia.org/infra/perf/go/db" |
| 20 "go.skia.org/infra/perf/go/types" | 20 "go.skia.org/infra/perf/go/types" |
| 21 ) | 21 ) |
| 22 | 22 |
| 23 const ( | 23 const ( |
| 24 CLUSTER_SIZE = 50 | 24 CLUSTER_SIZE = 50 |
| 25 CLUSTER_STDDEV = 0.001 | 25 CLUSTER_STDDEV = 0.001 |
| 26 | 26 |
| 27 // TRACKED_ITEM_URL_TEMPLATE is used to generate the URL that is | 27 // TRACKED_ITEM_URL_TEMPLATE is used to generate the URL that is |
| 28 // embedded in an issue. It is also used to search for issues linked to
a | 28 // embedded in an issue. It is also used to search for issues linked to
a |
| 29 // specific item (cluster). The format verb is to be replaced with the I
D | 29 // specific item (cluster). The format verb is to be replaced with the I
D |
| 30 // of the tracked item. | 30 // of the tracked item. |
| 31 TRACKED_ITEM_URL_TEMPLATE = "https://perf.skia.org/cl/%d" | 31 TRACKED_ITEM_URL_TEMPLATE = "https://perf.skia.org/cl/%d" |
| 32 ) | 32 ) |
| 33 | 33 |
| 34 var ( | 34 var ( |
| 35 // The number of clusters with a status of "New". | 35 // The number of clusters with a status of "New". |
| 36 newClustersGauge = metrics.NewRegisteredGauge("alerting.new", metrics.De
faultRegistry) | 36 newClustersGauge = metrics.NewRegisteredGauge("alerting.new", metrics.De
faultRegistry) |
| 37 | 37 |
| 38 // The number of times we've successfully done alert clustering. | 38 // The number of times we've successfully done alert clustering. |
| 39 runsCounter = metrics.NewRegisteredCounter("alerting.runs", metrics.Defa
ultRegistry) | 39 runsCounter = metrics.NewRegisteredCounter("alerting.runs", metrics.Defa
ultRegistry) |
| 40 | 40 |
| 41 // How long it takes to do a clustering run. | 41 // How long it takes to do a clustering run. |
| 42 alertingLatency = metrics.NewRegisteredTimer("alerting.latency", metrics
.DefaultRegistry) | 42 alertingLatency = metrics.NewRegisteredTimer("alerting.latency", metrics
.DefaultRegistry) |
| 43 | 43 |
| 44 » // tileStore is the TileStore we are alerting on. | 44 » // tileBuilder is the tracedb.Builder where we load Tiles from. |
| 45 » tileStore tiling.TileStore | 45 » tileBuilder *tracedb.Builder |
| 46 ) | 46 ) |
| 47 | 47 |
| 48 // CombineClusters combines freshly found clusters with existing clusters. | 48 // CombineClusters combines freshly found clusters with existing clusters. |
| 49 // | 49 // |
| 50 // Algorithm: | 50 // Algorithm: |
| 51 // Run clustering and pick out the "Interesting" clusters. | 51 // Run clustering and pick out the "Interesting" clusters. |
| 52 // Compare all the Interesting clusters to all the existing relevant clusters
, | 52 // Compare all the Interesting clusters to all the existing relevant clusters
, |
| 53 // where "relevant" clusters are ones whose Hash/timestamp of the step | 53 // where "relevant" clusters are ones whose Hash/timestamp of the step |
| 54 // exists in the current tile. | 54 // exists in the current tile. |
| 55 // Start with an empty "list". | 55 // Start with an empty "list". |
| (...skipping 209 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 265 if !util.Int64Equal(bugs, c.Bugs) { | 265 if !util.Int64Equal(bugs, c.Bugs) { |
| 266 c.Bugs = bugs | 266 c.Bugs = bugs |
| 267 if err := Write(c); err != nil { | 267 if err := Write(c); err != nil { |
| 268 return fmt.Errorf("Alerting: Failed to write updated clu
ster with bugs: %s", err) | 268 return fmt.Errorf("Alerting: Failed to write updated clu
ster with bugs: %s", err) |
| 269 } | 269 } |
| 270 } | 270 } |
| 271 return nil | 271 return nil |
| 272 } | 272 } |
| 273 | 273 |
| 274 // singleStep does a single round of alerting. | 274 // singleStep does a single round of alerting. |
| 275 func singleStep(tileStore tiling.TileStore, issueTracker issues.IssueTracker) { | 275 func singleStep(issueTracker issues.IssueTracker) { |
| 276 latencyBegin := time.Now() | 276 latencyBegin := time.Now() |
| 277 » tile, err := tileStore.Get(0, -1) | 277 » tile := tileBuilder.GetTile() |
| 278 » if err != nil { | |
| 279 » » glog.Errorf("Alerting: Failed to get tile: %s", err) | |
| 280 » » return | |
| 281 » } | |
| 282 | |
| 283 summary, err := clustering.CalculateClusterSummaries(tile, CLUSTER_SIZE,
CLUSTER_STDDEV, skpOnly) | 278 summary, err := clustering.CalculateClusterSummaries(tile, CLUSTER_SIZE,
CLUSTER_STDDEV, skpOnly) |
| 284 if err != nil { | 279 if err != nil { |
| 285 glog.Errorf("Alerting: Failed to calculate clusters: %s", err) | 280 glog.Errorf("Alerting: Failed to calculate clusters: %s", err) |
| 286 return | 281 return |
| 287 } | 282 } |
| 288 fresh := []*types.ClusterSummary{} | 283 fresh := []*types.ClusterSummary{} |
| 289 for _, c := range summary.Clusters { | 284 for _, c := range summary.Clusters { |
| 290 if math.Abs(c.StepFit.Regression) > clustering.INTERESTING_THRES
HHOLD { | 285 if math.Abs(c.StepFit.Regression) > clustering.INTERESTING_THRES
HHOLD { |
| 291 fresh = append(fresh, c) | 286 fresh = append(fresh, c) |
| 292 } | 287 } |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 331 } | 326 } |
| 332 } | 327 } |
| 333 newClustersGauge.Update(int64(count)) | 328 newClustersGauge.Update(int64(count)) |
| 334 runsCounter.Inc(1) | 329 runsCounter.Inc(1) |
| 335 alertingLatency.UpdateSince(latencyBegin) | 330 alertingLatency.UpdateSince(latencyBegin) |
| 336 } | 331 } |
| 337 | 332 |
| 338 // calcNewClusters counts how many clusters are "New" and updates | 333 // calcNewClusters counts how many clusters are "New" and updates |
| 339 // the newClusterGauge metric accordingly. | 334 // the newClusterGauge metric accordingly. |
| 340 func calcNewClusters() { | 335 func calcNewClusters() { |
| 341 » tile, err := tileStore.Get(0, -1) | 336 » tile := tileBuilder.GetTile() |
| 342 » if err != nil { | |
| 343 » » glog.Errorf("Alerting: Failed to get tile: %s", err) | |
| 344 » » return | |
| 345 » } | |
| 346 current, err := ListFrom(tile.Commits[0].CommitTime) | 337 current, err := ListFrom(tile.Commits[0].CommitTime) |
| 347 if err != nil { | 338 if err != nil { |
| 348 glog.Errorf("Alerting: Failed to get existing clusters: %s", err
) | 339 glog.Errorf("Alerting: Failed to get existing clusters: %s", err
) |
| 349 return | 340 return |
| 350 } | 341 } |
| 351 count := 0 | 342 count := 0 |
| 352 for _, c := range current { | 343 for _, c := range current { |
| 353 if c.Status == "New" { | 344 if c.Status == "New" { |
| 354 count++ | 345 count++ |
| 355 } | 346 } |
| 356 } | 347 } |
| 357 glog.Infof("Updated new cluster count: %d", count) | 348 glog.Infof("Updated new cluster count: %d", count) |
| 358 newClustersGauge.Update(int64(count)) | 349 newClustersGauge.Update(int64(count)) |
| 359 } | 350 } |
| 360 | 351 |
| 361 // Start kicks off a go routine the periodically refreshes the current alerting
clusters. | 352 // Start kicks off a go routine the periodically refreshes the current alerting
clusters. |
| 362 func Start(ts tiling.TileStore, apiKeyFlag string) { | 353 func Start(tb *tracedb.Builder, apiKeyFlag string) { |
| 354 » tileBuilder = tb |
| 363 apiKey := apiKeyFromFlag(apiKeyFlag) | 355 apiKey := apiKeyFromFlag(apiKeyFlag) |
| 364 var issueTracker issues.IssueTracker = nil | 356 var issueTracker issues.IssueTracker = nil |
| 365 if apiKey != "" { | 357 if apiKey != "" { |
| 366 issueTracker = issues.NewIssueTracker(apiKey) | 358 issueTracker = issues.NewIssueTracker(apiKey) |
| 367 } | 359 } |
| 368 | 360 |
| 369 tileStore = ts | |
| 370 go func() { | 361 go func() { |
| 371 for _ = range time.Tick(config.RECLUSTER_DURATION) { | 362 for _ = range time.Tick(config.RECLUSTER_DURATION) { |
| 372 » » » singleStep(ts, issueTracker) | 363 » » » singleStep(issueTracker) |
| 373 } | 364 } |
| 374 }() | 365 }() |
| 375 } | 366 } |
| OLD | NEW |