Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: fuzzer/go/frontend/gsloader/gsloader.go

Issue 1672033002: Deduplicate fuzz cases on the frontend before they are displayed. (Closed) Base URL: https://skia.googlesource.com/buildbot@remove-overview
Patch Set: merged upstream Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 package gsloader 1 package gsloader
2 2
3 import ( 3 import (
4 "fmt" 4 "fmt"
5 "sort" 5 "sort"
6 "sync" 6 "sync"
7 "sync/atomic" 7 "sync/atomic"
8 8
9 "github.com/skia-dev/glog" 9 "github.com/skia-dev/glog"
10 "go.skia.org/infra/fuzzer/go/common" 10 "go.skia.org/infra/fuzzer/go/common"
11 "go.skia.org/infra/fuzzer/go/config" 11 "go.skia.org/infra/fuzzer/go/config"
12 "go.skia.org/infra/fuzzer/go/deduplicator"
12 "go.skia.org/infra/fuzzer/go/frontend/data" 13 "go.skia.org/infra/fuzzer/go/frontend/data"
13 "go.skia.org/infra/fuzzer/go/fuzzcache" 14 "go.skia.org/infra/fuzzer/go/fuzzcache"
14 "go.skia.org/infra/go/gs" 15 "go.skia.org/infra/go/gs"
15 "google.golang.org/cloud/storage" 16 "google.golang.org/cloud/storage"
16 ) 17 )
17 18
18 // LoadFromBoltDB loads the data.FuzzReport from FuzzReportCache associated with the given hash. 19 // LoadFromBoltDB loads the data.FuzzReport from FuzzReportCache associated with the given hash.
19 // The FuzzReport is first put into the staging fuzz cache, and then into the cu rrent. 20 // The FuzzReport is first put into the staging fuzz cache, and then into the cu rrent.
20 // If a cache for the commit does not exist, or there are other problems with th e retrieval, 21 // If a cache for the commit does not exist, or there are other problems with th e retrieval,
21 // an error is returned. 22 // an error is returned. We do not need to deduplicate on extraction because
23 // the fuzzes were deduplicated on storage.
22 func LoadFromBoltDB(cache *fuzzcache.FuzzReportCache) error { 24 func LoadFromBoltDB(cache *fuzzcache.FuzzReportCache) error {
23 glog.Infof("Looking into cache for revision %s", config.FrontEnd.SkiaVer sion.Hash) 25 glog.Infof("Looking into cache for revision %s", config.FrontEnd.SkiaVer sion.Hash)
24 for _, category := range common.FUZZ_CATEGORIES { 26 for _, category := range common.FUZZ_CATEGORIES {
25 if staging, err := cache.LoadTree(category, config.FrontEnd.Skia Version.Hash); err != nil { 27 if staging, err := cache.LoadTree(category, config.FrontEnd.Skia Version.Hash); err != nil {
26 return fmt.Errorf("Problem decoding existing from bolt d b: %s", err) 28 return fmt.Errorf("Problem decoding existing from bolt d b: %s", err)
27 } else { 29 } else {
28 data.SetStaging(category, *staging) 30 data.SetStaging(category, *staging)
29 glog.Infof("Successfully loaded %s fuzzes from bolt db c ache", category) 31 glog.Infof("Successfully loaded %s fuzzes from bolt db c ache", category)
30 } 32 }
31 } 33 }
32 data.StagingToCurrent() 34 data.StagingToCurrent()
33 return nil 35 return nil
34 } 36 }
35 37
36 // GSLoader is a struct that handles downloading fuzzes from Google Storage. 38 // GSLoader is a struct that handles downloading fuzzes from Google Storage.
37 type GSLoader struct { 39 type GSLoader struct {
38 storageClient *storage.Client 40 storageClient *storage.Client
39 Cache *fuzzcache.FuzzReportCache 41 Cache *fuzzcache.FuzzReportCache
42 deduplicator *deduplicator.Deduplicator
40 43
41 // completedCounter is the number of fuzzes that have been downloaded fr om GCS, used for logging. 44 // completedCounter is the number of fuzzes that have been downloaded fr om GCS, used for logging.
42 completedCounter int32 45 completedCounter int32
43 } 46 }
44 47
45 // New creates a GSLoader and returns it. 48 // New creates a GSLoader and returns it.
46 func New(s *storage.Client, c *fuzzcache.FuzzReportCache) *GSLoader { 49 func New(s *storage.Client, c *fuzzcache.FuzzReportCache) *GSLoader {
47 return &GSLoader{ 50 return &GSLoader{
48 storageClient: s, 51 storageClient: s,
49 Cache: c, 52 Cache: c,
53 deduplicator: deduplicator.New(),
50 } 54 }
51 } 55 }
52 56
53 // LoadFreshFromGoogleStorage pulls all fuzzes out of GCS and loads them into me mory. 57 // LoadFreshFromGoogleStorage pulls all fuzzes out of GCS and loads them into me mory.
54 // The "fresh" in the name refers to the fact that all other loaded fuzzes (if a ny) 58 // The "fresh" in the name refers to the fact that all other loaded fuzzes (if a ny)
55 // are written over, including in the cache. 59 // are written over, including in the cache.
56 // Upon completion, the full results are cached to a boltDB instance and moved f rom staging 60 // Upon completion, the full results are cached to a boltDB instance and moved f rom staging
57 // to the current copy. 61 // to the current copy.
58 func (g *GSLoader) LoadFreshFromGoogleStorage() error { 62 func (g *GSLoader) LoadFreshFromGoogleStorage() error {
59 revision := config.FrontEnd.SkiaVersion.Hash 63 revision := config.FrontEnd.SkiaVersion.Hash
60 data.ClearStaging() 64 data.ClearStaging()
65 g.deduplicator.Clear()
61 fuzzNames := make([]string, 0, 100) 66 fuzzNames := make([]string, 0, 100)
62 for _, cat := range common.FUZZ_CATEGORIES { 67 for _, cat := range common.FUZZ_CATEGORIES {
63 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) 68 badPath := fmt.Sprintf("%s/%s/bad", cat, revision)
64 reports, err := g.getBinaryReportsFromGS(badPath, cat, nil) 69 reports, err := g.getBinaryReportsFromGS(badPath, cat, nil)
65 if err != nil { 70 if err != nil {
66 return err 71 return err
67 } 72 }
68 » » n := 0 73 » » b := 0
74 » » d := 0
69 for report := range reports { 75 for report := range reports {
70 » » » data.NewFuzzFound(cat, report) 76 » » » // We always add the fuzzName, to avoid redownloading du plicates over and over again.
71 fuzzNames = append(fuzzNames, report.FuzzName) 77 fuzzNames = append(fuzzNames, report.FuzzName)
72 » » » n++ 78 » » » if g.deduplicator.IsUnique(report) {
79 » » » » data.NewFuzzFound(cat, report)
80 » » » » b++
81 » » » } else {
82 » » » » d++
83 » » » }
84
73 } 85 }
74 » » glog.Infof("%d bad fuzzes freshly loaded from gs://%s/%s", n, co nfig.GS.Bucket, badPath) 86 » » glog.Infof("%d bad fuzzes (%d duplicate) freshly loaded from gs: //%s/%s", b, d, config.GS.Bucket, badPath)
87 » » data.StagingToCurrent()
75 } 88 }
76 89
77 data.StagingToCurrent()
78 for _, category := range common.FUZZ_CATEGORIES { 90 for _, category := range common.FUZZ_CATEGORIES {
79 if err := g.Cache.StoreTree(data.StagingCopy(category), category , revision); err != nil { 91 if err := g.Cache.StoreTree(data.StagingCopy(category), category , revision); err != nil {
80 glog.Errorf("Problem storing category %s to boltDB: %s", category, err) 92 glog.Errorf("Problem storing category %s to boltDB: %s", category, err)
81 } 93 }
82 } 94 }
83 return g.Cache.StoreFuzzNames(fuzzNames, revision) 95 return g.Cache.StoreFuzzNames(fuzzNames, revision)
84 } 96 }
85 97
86 // LoadBinaryFuzzesFromGoogleStorage pulls all fuzzes out of GCS that are on the given whitelist 98 // LoadBinaryFuzzesFromGoogleStorage pulls all fuzzes out of GCS that are on the given whitelist
87 // and loads them into memory (as staging). After loading them, it updates the cache 99 // and loads them into memory (as staging). After loading them, it updates the cache
88 // and moves them from staging to the current copy. 100 // and moves them from staging to the current copy.
89 func (g *GSLoader) LoadBinaryFuzzesFromGoogleStorage(whitelist []string) error { 101 func (g *GSLoader) LoadBinaryFuzzesFromGoogleStorage(whitelist []string) error {
90 revision := config.FrontEnd.SkiaVersion.Hash 102 revision := config.FrontEnd.SkiaVersion.Hash
91 data.StagingFromCurrent() 103 data.StagingFromCurrent()
92 sort.Strings(whitelist) 104 sort.Strings(whitelist)
93 105
94 fuzzNames := make([]string, 0, 100) 106 fuzzNames := make([]string, 0, 100)
95 for _, cat := range common.FUZZ_CATEGORIES { 107 for _, cat := range common.FUZZ_CATEGORIES {
96 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) 108 badPath := fmt.Sprintf("%s/%s/bad", cat, revision)
97 reports, err := g.getBinaryReportsFromGS(badPath, cat, whitelist ) 109 reports, err := g.getBinaryReportsFromGS(badPath, cat, whitelist )
98 if err != nil { 110 if err != nil {
99 return err 111 return err
100 } 112 }
101 » » n := 0 113 » » b := 0
114 » » d := 0
102 for report := range reports { 115 for report := range reports {
103 » » » data.NewFuzzFound(cat, report) 116 » » » // We always add the fuzzName, to avoid redownloading du plicates over and over again.
104 fuzzNames = append(fuzzNames, report.FuzzName) 117 fuzzNames = append(fuzzNames, report.FuzzName)
105 » » » n++ 118 » » » if g.deduplicator.IsUnique(report) {
119 » » » » data.NewFuzzFound(cat, report)
120 » » » » b++
121 » » » } else {
122 » » » » d++
123 » » » }
106 } 124 }
107 » » glog.Infof("%d bad fuzzes freshly loaded from gs://%s/%s", n, co nfig.GS.Bucket, badPath) 125 » » glog.Infof("%d bad fuzzes (%d duplicate) incrementally loaded fr om gs://%s/%s", b, d, config.GS.Bucket, badPath)
108 } 126 }
109 data.StagingToCurrent() 127 data.StagingToCurrent()
110 128
111 oldBinaryFuzzNames, err := g.Cache.LoadFuzzNames(revision) 129 oldBinaryFuzzNames, err := g.Cache.LoadFuzzNames(revision)
112 if err != nil { 130 if err != nil {
113 glog.Warningf("Could not read old binary fuzz names from cache. Continuing...", err) 131 glog.Warningf("Could not read old binary fuzz names from cache. Continuing...", err)
114 oldBinaryFuzzNames = []string{} 132 oldBinaryFuzzNames = []string{}
115 } 133 }
116 for _, category := range common.FUZZ_CATEGORIES { 134 for _, category := range common.FUZZ_CATEGORIES {
117 if err := g.Cache.StoreTree(data.StagingCopy(category), category , revision); err != nil { 135 if err := g.Cache.StoreTree(data.StagingCopy(category), category , revision); err != nil {
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
234 }, 252 },
235 } 253 }
236 254
237 reports <- data.ParseReport(p) 255 reports <- data.ParseReport(p)
238 atomic.AddInt32(&g.completedCounter, 1) 256 atomic.AddInt32(&g.completedCounter, 1)
239 if g.completedCounter%100 == 0 { 257 if g.completedCounter%100 == 0 {
240 glog.Infof("%d fuzzes downloaded", g.completedCounter) 258 glog.Infof("%d fuzzes downloaded", g.completedCounter)
241 } 259 }
242 } 260 }
243 } 261 }
OLDNEW
« no previous file with comments | « fuzzer/go/frontend/data/testdata/parse-asan-single.asan ('k') | fuzzer/go/frontend/syncer/fuzz_syncer.go » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698