Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(84)

Unified Diff: fuzzer/go/frontend/gsloader/gsloader.go

Issue 1672033002: Deduplicate fuzz cases on the frontend before they are displayed. (Closed) Base URL: https://skia.googlesource.com/buildbot@remove-overview
Patch Set: merged upstream Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « fuzzer/go/frontend/data/testdata/parse-asan-single.asan ('k') | fuzzer/go/frontend/syncer/fuzz_syncer.go » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: fuzzer/go/frontend/gsloader/gsloader.go
diff --git a/fuzzer/go/frontend/gsloader/gsloader.go b/fuzzer/go/frontend/gsloader/gsloader.go
index ef8249bcd29776cffb74bafdb9c2b50718836a22..b8393ae62916ecdfc9ef3229c66661ab19b2a203 100644
--- a/fuzzer/go/frontend/gsloader/gsloader.go
+++ b/fuzzer/go/frontend/gsloader/gsloader.go
@@ -9,6 +9,7 @@ import (
"github.com/skia-dev/glog"
"go.skia.org/infra/fuzzer/go/common"
"go.skia.org/infra/fuzzer/go/config"
+ "go.skia.org/infra/fuzzer/go/deduplicator"
"go.skia.org/infra/fuzzer/go/frontend/data"
"go.skia.org/infra/fuzzer/go/fuzzcache"
"go.skia.org/infra/go/gs"
@@ -18,7 +19,8 @@ import (
// LoadFromBoltDB loads the data.FuzzReport from FuzzReportCache associated with the given hash.
// The FuzzReport is first put into the staging fuzz cache, and then into the current.
// If a cache for the commit does not exist, or there are other problems with the retrieval,
-// an error is returned.
+// an error is returned. We do not need to deduplicate on extraction because
+// the fuzzes were deduplicated on storage.
func LoadFromBoltDB(cache *fuzzcache.FuzzReportCache) error {
glog.Infof("Looking into cache for revision %s", config.FrontEnd.SkiaVersion.Hash)
for _, category := range common.FUZZ_CATEGORIES {
@@ -37,6 +39,7 @@ func LoadFromBoltDB(cache *fuzzcache.FuzzReportCache) error {
type GSLoader struct {
storageClient *storage.Client
Cache *fuzzcache.FuzzReportCache
+ deduplicator *deduplicator.Deduplicator
// completedCounter is the number of fuzzes that have been downloaded from GCS, used for logging.
completedCounter int32
@@ -47,6 +50,7 @@ func New(s *storage.Client, c *fuzzcache.FuzzReportCache) *GSLoader {
return &GSLoader{
storageClient: s,
Cache: c,
+ deduplicator: deduplicator.New(),
}
}
@@ -58,6 +62,7 @@ func New(s *storage.Client, c *fuzzcache.FuzzReportCache) *GSLoader {
func (g *GSLoader) LoadFreshFromGoogleStorage() error {
revision := config.FrontEnd.SkiaVersion.Hash
data.ClearStaging()
+ g.deduplicator.Clear()
fuzzNames := make([]string, 0, 100)
for _, cat := range common.FUZZ_CATEGORIES {
badPath := fmt.Sprintf("%s/%s/bad", cat, revision)
@@ -65,16 +70,23 @@ func (g *GSLoader) LoadFreshFromGoogleStorage() error {
if err != nil {
return err
}
- n := 0
+ b := 0
+ d := 0
for report := range reports {
- data.NewFuzzFound(cat, report)
+ // We always add the fuzzName, to avoid redownloading duplicates over and over again.
fuzzNames = append(fuzzNames, report.FuzzName)
- n++
+ if g.deduplicator.IsUnique(report) {
+ data.NewFuzzFound(cat, report)
+ b++
+ } else {
+ d++
+ }
+
}
- glog.Infof("%d bad fuzzes freshly loaded from gs://%s/%s", n, config.GS.Bucket, badPath)
+ glog.Infof("%d bad fuzzes (%d duplicate) freshly loaded from gs://%s/%s", b, d, config.GS.Bucket, badPath)
+ data.StagingToCurrent()
}
- data.StagingToCurrent()
for _, category := range common.FUZZ_CATEGORIES {
if err := g.Cache.StoreTree(data.StagingCopy(category), category, revision); err != nil {
glog.Errorf("Problem storing category %s to boltDB: %s", category, err)
@@ -98,13 +110,19 @@ func (g *GSLoader) LoadBinaryFuzzesFromGoogleStorage(whitelist []string) error {
if err != nil {
return err
}
- n := 0
+ b := 0
+ d := 0
for report := range reports {
- data.NewFuzzFound(cat, report)
+ // We always add the fuzzName, to avoid redownloading duplicates over and over again.
fuzzNames = append(fuzzNames, report.FuzzName)
- n++
+ if g.deduplicator.IsUnique(report) {
+ data.NewFuzzFound(cat, report)
+ b++
+ } else {
+ d++
+ }
}
- glog.Infof("%d bad fuzzes freshly loaded from gs://%s/%s", n, config.GS.Bucket, badPath)
+ glog.Infof("%d bad fuzzes (%d duplicate) incrementally loaded from gs://%s/%s", b, d, config.GS.Bucket, badPath)
}
data.StagingToCurrent()
« no previous file with comments | « fuzzer/go/frontend/data/testdata/parse-asan-single.asan ('k') | fuzzer/go/frontend/syncer/fuzz_syncer.go » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698