Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(42)

Side by Side Diff: fuzzer/go/common/storage.go

Issue 1691893002: Fuzzer now deduplicates on the analysis side instead of the download side (Closed) Base URL: https://skia.googlesource.com/buildbot@metrics
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « fuzzer/go/backend/version_updater.go ('k') | fuzzer/go/config/config.go » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 package common 1 package common
2 2
3 import ( 3 import (
4 "fmt" 4 "fmt"
5 "io/ioutil"
6 "os"
7 "path/filepath"
5 "strings" 8 "strings"
9 "sync"
10 "sync/atomic"
6 11
12 "github.com/skia-dev/glog"
7 "go.skia.org/infra/fuzzer/go/config" 13 "go.skia.org/infra/fuzzer/go/config"
14 "go.skia.org/infra/go/fileutil"
8 "go.skia.org/infra/go/gs" 15 "go.skia.org/infra/go/gs"
9 "google.golang.org/cloud/storage" 16 "google.golang.org/cloud/storage"
10 ) 17 )
11 18
19 // ExtractFuzzNameFromPath turns a path name into a fuzz name by stripping off a ll but the
20 // last piece from the path.
21 func ExtractFuzzNameFromPath(path string) (name string) {
22 return path[strings.LastIndex(path, "/")+1:]
23 }
24
25 // ExtractFuzzNamesFromPaths turns all path names into just fuzz names, by extra cting the
26 // last piece of the path.
27 func ExtractFuzzNamesFromPaths(paths []string) (names []string) {
28 names = make([]string, 0, len(paths))
29 for _, path := range paths {
30 names = append(names, ExtractFuzzNameFromPath(path))
31 }
32 return names
33 }
34
12 // GetAllFuzzNamesInFolder returns all the fuzz names in a given GCS folder. It basically 35 // GetAllFuzzNamesInFolder returns all the fuzz names in a given GCS folder. It basically
13 // returns a list of all files that don't end with a .dump or .err, or error 36 // returns a list of all files that don't end with a .dump or .err, or error
14 // if there was a problem. 37 // if there was a problem.
15 func GetAllFuzzNamesInFolder(s *storage.Client, name string) (hashes []string, e rr error) { 38 func GetAllFuzzNamesInFolder(s *storage.Client, name string) (hashes []string, e rr error) {
16 filter := func(item *storage.ObjectAttrs) { 39 filter := func(item *storage.ObjectAttrs) {
17 name := item.Name 40 name := item.Name
18 » » if strings.Contains(name, ".") { 41 » » if !IsNameOfFuzz(name) {
19 return 42 return
20 } 43 }
21 fuzzHash := name[strings.LastIndex(name, "/")+1:] 44 fuzzHash := name[strings.LastIndex(name, "/")+1:]
22 hashes = append(hashes, fuzzHash) 45 hashes = append(hashes, fuzzHash)
23 } 46 }
24 47
25 if err = gs.AllFilesInDir(s, config.GS.Bucket, name, filter); err != nil { 48 if err = gs.AllFilesInDir(s, config.GS.Bucket, name, filter); err != nil {
26 return hashes, fmt.Errorf("Problem getting fuzzes from folder %s : %s", name, err) 49 return hashes, fmt.Errorf("Problem getting fuzzes from folder %s : %s", name, err)
27 } 50 }
28 return hashes, nil 51 return hashes, nil
29 } 52 }
53
54 // IsNameOfFuzz returns true if the GCS file name given is a fuzz, which is basi cally if it doesn't
55 // have a . in it.
56 func IsNameOfFuzz(name string) bool {
57 return !strings.Contains(name, ".")
58 }
59
60 // DownloadAllFuzzes downloads all fuzzes of a given type "bad", "grey" at the s pecified revision
61 // and returns a slice of all the paths on disk where they are.
62 func DownloadAllFuzzes(s *storage.Client, downloadPath, category, revision, fuzz Type string, processes int) ([]string, error) {
63 completedCount := int32(0)
64 var wg sync.WaitGroup
65 toDownload := make(chan string, 1000)
66 for i := 0; i < processes; i++ {
67 go download(s, toDownload, downloadPath, &wg, &completedCount)
68 }
69 fuzzPaths := []string{}
70
71 download := func(item *storage.ObjectAttrs) {
72 name := item.Name
73 if !IsNameOfFuzz(name) {
74 return
75 }
76 fuzzHash := name[strings.LastIndex(name, "/")+1:]
77 fuzzPaths = append(fuzzPaths, filepath.Join(downloadPath, fuzzHa sh))
78 toDownload <- item.Name
79 }
80 if err := gs.AllFilesInDir(s, config.GS.Bucket, fmt.Sprintf("%s/%s/%s", category, revision, fuzzType), download); err != nil {
81 return nil, fmt.Errorf("Problem iterating through all files: %s" , err)
82 }
83 close(toDownload)
84 wg.Wait()
85
86 return fuzzPaths, nil
87 }
88
89 // download starts a go routine that waits for files to download from Google Sto rage and downloads
90 // them to downloadPath. When it is done (on error or when the channel is close d), it signals to
91 // the WaitGroup that it is done. It also logs the progress on downloading the f uzzes.
92 func download(storageClient *storage.Client, toDownload <-chan string, downloadP ath string, wg *sync.WaitGroup, completedCounter *int32) {
93 wg.Add(1)
94 defer wg.Done()
95 for file := range toDownload {
96 hash := file[strings.LastIndex(file, "/")+1:]
97 onDisk := filepath.Join(downloadPath, hash)
98 if !fileutil.FileExists(onDisk) {
99 contents, err := gs.FileContentsFromGS(storageClient, co nfig.GS.Bucket, file)
100 if err != nil {
101 glog.Warningf("Problem downloading fuzz %s, cont inuing anyway: %s", file, err)
102 continue
103 }
104 if err = ioutil.WriteFile(onDisk, contents, 0644); err ! = nil && !os.IsExist(err) {
105 glog.Warningf("Problem writing fuzz to %s, conti nuing anyway: %s", onDisk, err)
106 }
107 }
108 atomic.AddInt32(completedCounter, 1)
109 if *completedCounter%100 == 0 {
110 glog.Infof("%d fuzzes downloaded", *completedCounter)
111 }
112 }
113 }
OLDNEW
« no previous file with comments | « fuzzer/go/backend/version_updater.go ('k') | fuzzer/go/config/config.go » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698