Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(318)

Unified Diff: milo/appengine/swarming/build.go

Issue 2717623002: Milo: Handle missing / transient LogDog failures. (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« milo/appengine/logdog/build.go ('K') | « milo/appengine/logdog/build.go ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: milo/appengine/swarming/build.go
diff --git a/milo/appengine/swarming/build.go b/milo/appengine/swarming/build.go
index f02c0273e25d89270b576aea8fee87610a891cb0..00ab7d87c782d0aee1363f6d089cf0a2811dccbe 100644
--- a/milo/appengine/swarming/build.go
+++ b/milo/appengine/swarming/build.go
@@ -15,6 +15,7 @@ import (
"golang.org/x/net/context"
swarming "github.com/luci/luci-go/common/api/swarming/swarming/v1"
+ "github.com/luci/luci-go/common/clock"
"github.com/luci/luci-go/common/errors"
"github.com/luci/luci-go/common/logging"
"github.com/luci/luci-go/common/proto/google"
@@ -35,6 +36,10 @@ var errNotMiloJob = errors.New("Not a Milo Job")
// SwarmingTimeLayout is time layout used by swarming.
const SwarmingTimeLayout = "2006-01-02T15:04:05.999999999"
+// logDogFetchTimeout is the amount of time to wait while fetching a LogDog
+// stream before we time out the fetch.
+const logDogFetchTimeout = 30 * time.Second
+
// Swarming task states..
const (
// TaskRunning means task is running.
@@ -512,6 +517,20 @@ func (bl *buildLoader) swarmingBuildImpl(c context.Context, svc swarmingService,
// Cancel if LogDog annotation stream parameters are present in the tag set.
taskResCallback: func(res *swarming.SwarmingRpcsTaskResult) (cancelLogs bool) {
+ // If the build hasn't started yet, then there is no LogDog log stream to
+ // render.
+ switch res.State {
+ case TaskPending, TaskExpired:
+ return false
+
+ case TaskCanceled:
+ // If the task wasn't created, then it wasn't started.
+ if res.CreatedTs == "" {
+ return false
+ }
+ }
+
+ // The task started ... is it using LogDog for logging?
tags := swarmingTags(res.Tags)
var err error
@@ -545,10 +564,6 @@ func (bl *buildLoader) swarmingBuildImpl(c context.Context, svc swarmingService,
return nil, errors.Annotate(err).Reason("failed to create LogDog annotation stream").Err()
}
- if s, err = as.Fetch(c); err != nil {
- return nil, errors.Annotate(err).Reason("failed to load LogDog annotation stream").Err()
- }
-
prefix, _ := logDogStreamAddr.Path.Split()
ub = &logdog.ViewerURLBuilder{
Host: logDogStreamAddr.Host,
@@ -556,6 +571,18 @@ func (bl *buildLoader) swarmingBuildImpl(c context.Context, svc swarmingService,
Project: logDogStreamAddr.Project,
}
+ fetchCtx, cancelFunc := clock.WithTimeout(c, logDogFetchTimeout)
+ defer cancelFunc()
+
+ if s, err = as.Fetch(fetchCtx); err != nil {
+ logging.Fields{
+ logging.ErrorKey: err,
+ "addr": logDogStreamAddr,
+ }.Errorf(c, "Failed to load LogDog annotation stream.")
+ build.Components = append(build.Components, infraFailureComponent("LogDog load error", err))
+ break
+ }
+
case fr.log != "":
// Decode the data using annotee. The logdog stream returned here is assumed
// to be consistent, which is why the following block of code are not
@@ -563,16 +590,12 @@ func (bl *buildLoader) swarmingBuildImpl(c context.Context, svc swarmingService,
var err error
lds, err = streamsFromAnnotatedLog(c, fr.log)
if err != nil {
- build.Components = []*resp.BuildComponent{{
- Type: resp.Summary,
- Label: "Milo annotation parser",
- Text: []string{err.Error()},
- Status: resp.InfraFailure,
- SubLink: []*resp.Link{{
- Label: "swarming task",
- URL: taskPageURL(svc.getHost(), taskID),
- }},
- }}
+ comp := infraFailureComponent("Milo annotation parser", err)
+ comp.SubLink = append(comp.SubLink, &resp.Link{
+ Label: "swarming task",
+ URL: taskPageURL(svc.getHost(), taskID),
+ })
+ build.Components = append(build.Components, comp)
}
if lds != nil && lds.MainStream != nil && lds.MainStream.Data != nil {
@@ -597,6 +620,15 @@ func (bl *buildLoader) swarmingBuildImpl(c context.Context, svc swarmingService,
return &build, nil
}
+func infraFailureComponent(label string, err error) *resp.BuildComponent {
+ return &resp.BuildComponent{
+ Type: resp.Summary,
+ Label: label,
+ Text: []string{err.Error()},
+ Status: resp.InfraFailure,
+ }
+}
+
func isMiloJob(tags []string) bool {
for _, t := range tags {
if t == "allow_milo:1" {
« milo/appengine/logdog/build.go ('K') | « milo/appengine/logdog/build.go ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698