Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(300)

Side by Side Diff: milo/appengine/swarming/build.go

Issue 2717623002: Milo: Handle missing / transient LogDog failures. (Closed)
Patch Set: remote unnecessary code Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The LUCI Authors. All rights reserved. 1 // Copyright 2015 The LUCI Authors. All rights reserved.
2 // Use of this source code is governed under the Apache License, Version 2.0 2 // Use of this source code is governed under the Apache License, Version 2.0
3 // that can be found in the LICENSE file. 3 // that can be found in the LICENSE file.
4 4
5 package swarming 5 package swarming
6 6
7 import ( 7 import (
8 "bytes" 8 "bytes"
9 "fmt" 9 "fmt"
10 "net/http" 10 "net/http"
(...skipping 17 matching lines...) Expand all
28 "github.com/luci/luci-go/server/auth" 28 "github.com/luci/luci-go/server/auth"
29 ) 29 )
30 30
31 // errNotMiloJob is returned if a Swarming task is fetched that does not self- 31 // errNotMiloJob is returned if a Swarming task is fetched that does not self-
32 // identify as a Milo job. 32 // identify as a Milo job.
33 var errNotMiloJob = errors.New("Not a Milo Job") 33 var errNotMiloJob = errors.New("Not a Milo Job")
34 34
35 // SwarmingTimeLayout is time layout used by swarming. 35 // SwarmingTimeLayout is time layout used by swarming.
36 const SwarmingTimeLayout = "2006-01-02T15:04:05.999999999" 36 const SwarmingTimeLayout = "2006-01-02T15:04:05.999999999"
37 37
38 // logDogFetchTimeout is the amount of time to wait while fetching a LogDog
39 // stream before we time out the fetch.
40 const logDogFetchTimeout = 30 * time.Second
41
38 // Swarming task states.. 42 // Swarming task states..
39 const ( 43 const (
40 // TaskRunning means task is running. 44 // TaskRunning means task is running.
41 TaskRunning = "RUNNING" 45 TaskRunning = "RUNNING"
42 // TaskPending means task didn't start yet. 46 // TaskPending means task didn't start yet.
43 TaskPending = "PENDING" 47 TaskPending = "PENDING"
44 // TaskExpired means task expired and did not start. 48 // TaskExpired means task expired and did not start.
45 TaskExpired = "EXPIRED" 49 TaskExpired = "EXPIRED"
46 // TaskTimedOut means task started, but took too long. 50 // TaskTimedOut means task started, but took too long.
47 TaskTimedOut = "TIMED_OUT" 51 TaskTimedOut = "TIMED_OUT"
(...skipping 457 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 func (bl *buildLoader) swarmingBuildImpl(c context.Context, svc swarmingService, linkBase, taskID string) (*resp.MiloBuild, error) { 509 func (bl *buildLoader) swarmingBuildImpl(c context.Context, svc swarmingService, linkBase, taskID string) (*resp.MiloBuild, error) {
506 // Fetch the data from Swarming 510 // Fetch the data from Swarming
507 var logDogStreamAddr *types.StreamAddr 511 var logDogStreamAddr *types.StreamAddr
508 512
509 fetchParams := swarmingFetchParams{ 513 fetchParams := swarmingFetchParams{
510 fetchRes: true, 514 fetchRes: true,
511 fetchLog: true, 515 fetchLog: true,
512 516
513 // Cancel if LogDog annotation stream parameters are present in the tag set. 517 // Cancel if LogDog annotation stream parameters are present in the tag set.
514 taskResCallback: func(res *swarming.SwarmingRpcsTaskResult) (can celLogs bool) { 518 taskResCallback: func(res *swarming.SwarmingRpcsTaskResult) (can celLogs bool) {
519 // If the build hasn't started yet, then there is no Log Dog log stream to
520 // render.
521 switch res.State {
522 case TaskPending, TaskExpired:
523 return false
524
525 case TaskCanceled:
526 // If the task wasn't created, then it wasn't st arted.
527 if res.CreatedTs == "" {
528 return false
529 }
530 }
531
532 // The task started ... is it using LogDog for logging?
515 tags := swarmingTags(res.Tags) 533 tags := swarmingTags(res.Tags)
516 534
517 var err error 535 var err error
518 if logDogStreamAddr, err = resolveLogDogStreamAddrFromTa gs(tags, res.TaskId, res.TryNumber); err != nil { 536 if logDogStreamAddr, err = resolveLogDogStreamAddrFromTa gs(tags, res.TaskId, res.TryNumber); err != nil {
519 logging.WithError(err).Debugf(c, "Not using LogD og annotation stream.") 537 logging.WithError(err).Debugf(c, "Not using LogD og annotation stream.")
520 return false 538 return false
521 } 539 }
522 return true 540 return true
523 }, 541 },
524 } 542 }
525 fr, err := swarmingFetch(c, svc, taskID, fetchParams) 543 fr, err := swarmingFetch(c, svc, taskID, fetchParams)
526 if err != nil { 544 if err != nil {
527 return nil, err 545 return nil, err
528 } 546 }
529 547
530 var build resp.MiloBuild 548 var build resp.MiloBuild
531 var s *miloProto.Step 549 var s *miloProto.Step
532 var lds *logdog.Streams 550 var lds *logdog.Streams
533 var ub logdog.URLBuilder 551 var ub logdog.URLBuilder
534 552
535 // Load the build from the available data. 553 // Load the build from the available data.
536 // 554 //
537 // If the Swarming task explicitly specifies its log location, we prefer that. 555 // If the Swarming task explicitly specifies its log location, we prefer that.
538 // As a fallback, we will try and parse the Swarming task's output for 556 // As a fallback, we will try and parse the Swarming task's output for
539 // annotations. 557 // annotations.
540 switch { 558 switch {
541 case logDogStreamAddr != nil: 559 case logDogStreamAddr != nil:
560 logging.Infof(c, "Loading build from LogDog stream at: %s", logD ogStreamAddr)
561
542 // If the LogDog stream is available, load the step from that. 562 // If the LogDog stream is available, load the step from that.
543 as, err := bl.newEmptyAnnotationStream(c, logDogStreamAddr) 563 as, err := bl.newEmptyAnnotationStream(c, logDogStreamAddr)
544 if err != nil { 564 if err != nil {
545 return nil, errors.Annotate(err).Reason("failed to creat e LogDog annotation stream").Err() 565 return nil, errors.Annotate(err).Reason("failed to creat e LogDog annotation stream").Err()
546 } 566 }
547 567
548 if s, err = as.Fetch(c); err != nil {
549 return nil, errors.Annotate(err).Reason("failed to load LogDog annotation stream").Err()
550 }
551
552 prefix, _ := logDogStreamAddr.Path.Split() 568 prefix, _ := logDogStreamAddr.Path.Split()
553 ub = &logdog.ViewerURLBuilder{ 569 ub = &logdog.ViewerURLBuilder{
554 Host: logDogStreamAddr.Host, 570 Host: logDogStreamAddr.Host,
555 Prefix: prefix, 571 Prefix: prefix,
556 Project: logDogStreamAddr.Project, 572 Project: logDogStreamAddr.Project,
557 } 573 }
558 574
575 if s, err = as.Fetch(c); err != nil {
576 switch errors.Unwrap(err) {
577 case coordinator.ErrNoSuchStream:
578 logging.WithError(err).Errorf(c, "User cannot ac cess stream.")
579 build.Components = append(build.Components, info Component(resp.Running,
580 "Waiting...", "waiting for annotation st ream"))
581
582 case coordinator.ErrNoAccess:
583 logging.WithError(err).Errorf(c, "User cannot ac cess stream.")
584 build.Components = append(build.Components, info Component(resp.Failure,
585 "No Access", "no access to annotation st ream"))
586
587 default:
588 logging.WithError(err).Errorf(c, "Failed to load LogDog annotation stream.")
589 build.Components = append(build.Components, info Component(resp.InfraFailure,
590 "Error", "failed to load annotation stre am"))
591 }
592 }
593
559 case fr.log != "": 594 case fr.log != "":
560 // Decode the data using annotee. The logdog stream returned her e is assumed 595 // Decode the data using annotee. The logdog stream returned her e is assumed
561 // to be consistent, which is why the following block of code ar e not 596 // to be consistent, which is why the following block of code ar e not
562 // expected to ever err out. 597 // expected to ever err out.
563 var err error 598 var err error
564 lds, err = streamsFromAnnotatedLog(c, fr.log) 599 lds, err = streamsFromAnnotatedLog(c, fr.log)
565 if err != nil { 600 if err != nil {
566 » » » build.Components = []*resp.BuildComponent{{ 601 » » » comp := infoComponent(resp.InfraFailure, "Milo annotatio n parser", err.Error())
567 » » » » Type: resp.Summary, 602 » » » comp.SubLink = append(comp.SubLink, &resp.Link{
568 » » » » Label: "Milo annotation parser", 603 » » » » Label: "swarming task",
569 » » » » Text: []string{err.Error()}, 604 » » » » URL: taskPageURL(svc.getHost(), taskID),
570 » » » » Status: resp.InfraFailure, 605 » » » })
571 » » » » SubLink: []*resp.Link{{ 606 » » » build.Components = append(build.Components, comp)
572 » » » » » Label: "swarming task",
573 » » » » » URL: taskPageURL(svc.getHost(), taskID ),
574 » » » » }},
575 » » » }}
576 } 607 }
577 608
578 if lds != nil && lds.MainStream != nil && lds.MainStream.Data != nil { 609 if lds != nil && lds.MainStream != nil && lds.MainStream.Data != nil {
579 s = lds.MainStream.Data 610 s = lds.MainStream.Data
580 } 611 }
581 ub = swarmingURLBuilder(linkBase) 612 ub = swarmingURLBuilder(linkBase)
582 613
583 default: 614 default:
584 s = &miloProto.Step{} 615 s = &miloProto.Step{}
585 ub = swarmingURLBuilder(linkBase) 616 ub = swarmingURLBuilder(linkBase)
586 } 617 }
587 618
588 » if err := addTaskToMiloStep(c, svc.getHost(), fr.res, s); err != nil { 619 » if s != nil {
589 » » return nil, err 620 » » if err := addTaskToMiloStep(c, svc.getHost(), fr.res, s); err != nil {
621 » » » return nil, err
622 » » }
623 » » logdog.AddLogDogToBuild(c, ub, s, &build)
590 } 624 }
591 logdog.AddLogDogToBuild(c, ub, s, &build)
592 625
593 if err := addTaskToBuild(c, svc.getHost(), fr.res, &build); err != nil { 626 if err := addTaskToBuild(c, svc.getHost(), fr.res, &build); err != nil {
594 return nil, err 627 return nil, err
595 } 628 }
596 629
597 return &build, nil 630 return &build, nil
598 } 631 }
599 632
633 func infoComponent(st resp.Status, label, text string) *resp.BuildComponent {
634 return &resp.BuildComponent{
635 Type: resp.Summary,
636 Label: label,
637 Text: []string{text},
638 Status: st,
639 }
640 }
641
600 func isMiloJob(tags []string) bool { 642 func isMiloJob(tags []string) bool {
601 for _, t := range tags { 643 for _, t := range tags {
602 if t == "allow_milo:1" { 644 if t == "allow_milo:1" {
603 return true 645 return true
604 } 646 }
605 } 647 }
606 return false 648 return false
607 } 649 }
608 650
609 // taskPageURL returns a URL to a human-consumable page of a swarming task. 651 // taskPageURL returns a URL to a human-consumable page of a swarming task.
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
665 for _, tag := range v { 707 for _, tag := range v {
666 var value string 708 var value string
667 parts := strings.SplitN(tag, ":", 2) 709 parts := strings.SplitN(tag, ":", 2)
668 if len(parts) == 2 { 710 if len(parts) == 2 {
669 value = parts[1] 711 value = parts[1]
670 } 712 }
671 res[parts[0]] = value 713 res[parts[0]] = value
672 } 714 }
673 return res 715 return res
674 } 716 }
OLDNEW
« no previous file with comments | « milo/appengine/logdog/http.go ('k') | milo/appengine/swarming/expectations/build-pending-logdog.json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698