Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Unified Diff: fuzzer/go/common/storage.go

Issue 1691893002: Fuzzer now deduplicates on the analysis side instead of the download side (Closed) Base URL: https://skia.googlesource.com/buildbot@metrics
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « fuzzer/go/backend/version_updater.go ('k') | fuzzer/go/config/config.go » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: fuzzer/go/common/storage.go
diff --git a/fuzzer/go/common/storage.go b/fuzzer/go/common/storage.go
index 010c32eae2deff61fc0d58dddf7cfea7b36443b4..ca1a7c76dcee433fef664c7bf98896c3db22a91f 100644
--- a/fuzzer/go/common/storage.go
+++ b/fuzzer/go/common/storage.go
@@ -2,20 +2,43 @@ package common
import (
"fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
"strings"
+ "sync"
+ "sync/atomic"
+ "github.com/skia-dev/glog"
"go.skia.org/infra/fuzzer/go/config"
+ "go.skia.org/infra/go/fileutil"
"go.skia.org/infra/go/gs"
"google.golang.org/cloud/storage"
)
+// ExtractFuzzNameFromPath turns a path name into a fuzz name by stripping off all but the
+// last piece from the path.
+func ExtractFuzzNameFromPath(path string) (name string) {
+ return path[strings.LastIndex(path, "/")+1:]
+}
+
+// ExtractFuzzNamesFromPaths turns all path names into just fuzz names, by extracting the
+// last piece of the path.
+func ExtractFuzzNamesFromPaths(paths []string) (names []string) {
+ names = make([]string, 0, len(paths))
+ for _, path := range paths {
+ names = append(names, ExtractFuzzNameFromPath(path))
+ }
+ return names
+}
+
// GetAllFuzzNamesInFolder returns all the fuzz names in a given GCS folder. It basically
// returns a list of all files that don't end with a .dump or .err, or error
// if there was a problem.
func GetAllFuzzNamesInFolder(s *storage.Client, name string) (hashes []string, err error) {
filter := func(item *storage.ObjectAttrs) {
name := item.Name
- if strings.Contains(name, ".") {
+ if !IsNameOfFuzz(name) {
return
}
fuzzHash := name[strings.LastIndex(name, "/")+1:]
@@ -27,3 +50,64 @@ func GetAllFuzzNamesInFolder(s *storage.Client, name string) (hashes []string, e
}
return hashes, nil
}
+
+// IsNameOfFuzz returns true if the GCS file name given is a fuzz, which is basically if it doesn't
+// have a . in it.
+func IsNameOfFuzz(name string) bool {
+ return !strings.Contains(name, ".")
+}
+
+// DownloadAllFuzzes downloads all fuzzes of a given type "bad", "grey" at the specified revision
+// and returns a slice of all the paths on disk where they are.
+func DownloadAllFuzzes(s *storage.Client, downloadPath, category, revision, fuzzType string, processes int) ([]string, error) {
+ completedCount := int32(0)
+ var wg sync.WaitGroup
+ toDownload := make(chan string, 1000)
+ for i := 0; i < processes; i++ {
+ go download(s, toDownload, downloadPath, &wg, &completedCount)
+ }
+ fuzzPaths := []string{}
+
+ download := func(item *storage.ObjectAttrs) {
+ name := item.Name
+ if !IsNameOfFuzz(name) {
+ return
+ }
+ fuzzHash := name[strings.LastIndex(name, "/")+1:]
+ fuzzPaths = append(fuzzPaths, filepath.Join(downloadPath, fuzzHash))
+ toDownload <- item.Name
+ }
+ if err := gs.AllFilesInDir(s, config.GS.Bucket, fmt.Sprintf("%s/%s/%s", category, revision, fuzzType), download); err != nil {
+ return nil, fmt.Errorf("Problem iterating through all files: %s", err)
+ }
+ close(toDownload)
+ wg.Wait()
+
+ return fuzzPaths, nil
+}
+
+// download starts a go routine that waits for files to download from Google Storage and downloads
+// them to downloadPath. When it is done (on error or when the channel is closed), it signals to
+// the WaitGroup that it is done. It also logs the progress on downloading the fuzzes.
+func download(storageClient *storage.Client, toDownload <-chan string, downloadPath string, wg *sync.WaitGroup, completedCounter *int32) {
+ wg.Add(1)
+ defer wg.Done()
+ for file := range toDownload {
+ hash := file[strings.LastIndex(file, "/")+1:]
+ onDisk := filepath.Join(downloadPath, hash)
+ if !fileutil.FileExists(onDisk) {
+ contents, err := gs.FileContentsFromGS(storageClient, config.GS.Bucket, file)
+ if err != nil {
+ glog.Warningf("Problem downloading fuzz %s, continuing anyway: %s", file, err)
+ continue
+ }
+ if err = ioutil.WriteFile(onDisk, contents, 0644); err != nil && !os.IsExist(err) {
+ glog.Warningf("Problem writing fuzz to %s, continuing anyway: %s", onDisk, err)
+ }
+ }
+ atomic.AddInt32(completedCounter, 1)
+ if *completedCounter%100 == 0 {
+ glog.Infof("%d fuzzes downloaded", *completedCounter)
+ }
+ }
+}
« no previous file with comments | « fuzzer/go/backend/version_updater.go ('k') | fuzzer/go/config/config.go » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698