| OLD | NEW |
| 1 package gsloader | 1 package gsloader |
| 2 | 2 |
| 3 import ( | 3 import ( |
| 4 "fmt" | 4 "fmt" |
| 5 "sort" | 5 "sort" |
| 6 "sync" | 6 "sync" |
| 7 "sync/atomic" | 7 "sync/atomic" |
| 8 | 8 |
| 9 "github.com/skia-dev/glog" | 9 "github.com/skia-dev/glog" |
| 10 "go.skia.org/infra/fuzzer/go/common" | 10 "go.skia.org/infra/fuzzer/go/common" |
| 11 "go.skia.org/infra/fuzzer/go/config" | 11 "go.skia.org/infra/fuzzer/go/config" |
| 12 "go.skia.org/infra/fuzzer/go/deduplicator" |
| 12 "go.skia.org/infra/fuzzer/go/frontend/data" | 13 "go.skia.org/infra/fuzzer/go/frontend/data" |
| 13 "go.skia.org/infra/fuzzer/go/fuzzcache" | 14 "go.skia.org/infra/fuzzer/go/fuzzcache" |
| 14 "go.skia.org/infra/go/gs" | 15 "go.skia.org/infra/go/gs" |
| 15 "google.golang.org/cloud/storage" | 16 "google.golang.org/cloud/storage" |
| 16 ) | 17 ) |
| 17 | 18 |
| 18 // LoadFromBoltDB loads the data.FuzzReport from FuzzReportCache associated with
the given hash. | 19 // LoadFromBoltDB loads the data.FuzzReport from FuzzReportCache associated with
the given hash. |
| 19 // The FuzzReport is first put into the staging fuzz cache, and then into the cu
rrent. | 20 // The FuzzReport is first put into the staging fuzz cache, and then into the cu
rrent. |
| 20 // If a cache for the commit does not exist, or there are other problems with th
e retrieval, | 21 // If a cache for the commit does not exist, or there are other problems with th
e retrieval, |
| 21 // an error is returned. | 22 // an error is returned. We do not need to deduplicate on extraction because |
| 23 // the fuzzes were deduplicated on storage. |
| 22 func LoadFromBoltDB(cache *fuzzcache.FuzzReportCache) error { | 24 func LoadFromBoltDB(cache *fuzzcache.FuzzReportCache) error { |
| 23 glog.Infof("Looking into cache for revision %s", config.FrontEnd.SkiaVer
sion.Hash) | 25 glog.Infof("Looking into cache for revision %s", config.FrontEnd.SkiaVer
sion.Hash) |
| 24 for _, category := range common.FUZZ_CATEGORIES { | 26 for _, category := range common.FUZZ_CATEGORIES { |
| 25 if staging, err := cache.LoadTree(category, config.FrontEnd.Skia
Version.Hash); err != nil { | 27 if staging, err := cache.LoadTree(category, config.FrontEnd.Skia
Version.Hash); err != nil { |
| 26 return fmt.Errorf("Problem decoding existing from bolt d
b: %s", err) | 28 return fmt.Errorf("Problem decoding existing from bolt d
b: %s", err) |
| 27 } else { | 29 } else { |
| 28 data.SetStaging(category, *staging) | 30 data.SetStaging(category, *staging) |
| 29 glog.Infof("Successfully loaded %s fuzzes from bolt db c
ache", category) | 31 glog.Infof("Successfully loaded %s fuzzes from bolt db c
ache", category) |
| 30 } | 32 } |
| 31 } | 33 } |
| 32 data.StagingToCurrent() | 34 data.StagingToCurrent() |
| 33 return nil | 35 return nil |
| 34 } | 36 } |
| 35 | 37 |
| 36 // GSLoader is a struct that handles downloading fuzzes from Google Storage. | 38 // GSLoader is a struct that handles downloading fuzzes from Google Storage. |
| 37 type GSLoader struct { | 39 type GSLoader struct { |
| 38 storageClient *storage.Client | 40 storageClient *storage.Client |
| 39 Cache *fuzzcache.FuzzReportCache | 41 Cache *fuzzcache.FuzzReportCache |
| 42 deduplicator *deduplicator.Deduplicator |
| 40 | 43 |
| 41 // completedCounter is the number of fuzzes that have been downloaded fr
om GCS, used for logging. | 44 // completedCounter is the number of fuzzes that have been downloaded fr
om GCS, used for logging. |
| 42 completedCounter int32 | 45 completedCounter int32 |
| 43 } | 46 } |
| 44 | 47 |
| 45 // New creates a GSLoader and returns it. | 48 // New creates a GSLoader and returns it. |
| 46 func New(s *storage.Client, c *fuzzcache.FuzzReportCache) *GSLoader { | 49 func New(s *storage.Client, c *fuzzcache.FuzzReportCache) *GSLoader { |
| 47 return &GSLoader{ | 50 return &GSLoader{ |
| 48 storageClient: s, | 51 storageClient: s, |
| 49 Cache: c, | 52 Cache: c, |
| 53 deduplicator: deduplicator.New(), |
| 50 } | 54 } |
| 51 } | 55 } |
| 52 | 56 |
| 53 // LoadFreshFromGoogleStorage pulls all fuzzes out of GCS and loads them into me
mory. | 57 // LoadFreshFromGoogleStorage pulls all fuzzes out of GCS and loads them into me
mory. |
| 54 // The "fresh" in the name refers to the fact that all other loaded fuzzes (if a
ny) | 58 // The "fresh" in the name refers to the fact that all other loaded fuzzes (if a
ny) |
| 55 // are written over, including in the cache. | 59 // are written over, including in the cache. |
| 56 // Upon completion, the full results are cached to a boltDB instance and moved f
rom staging | 60 // Upon completion, the full results are cached to a boltDB instance and moved f
rom staging |
| 57 // to the current copy. | 61 // to the current copy. |
| 58 func (g *GSLoader) LoadFreshFromGoogleStorage() error { | 62 func (g *GSLoader) LoadFreshFromGoogleStorage() error { |
| 59 revision := config.FrontEnd.SkiaVersion.Hash | 63 revision := config.FrontEnd.SkiaVersion.Hash |
| 60 data.ClearStaging() | 64 data.ClearStaging() |
| 65 g.deduplicator.Clear() |
| 61 fuzzNames := make([]string, 0, 100) | 66 fuzzNames := make([]string, 0, 100) |
| 62 for _, cat := range common.FUZZ_CATEGORIES { | 67 for _, cat := range common.FUZZ_CATEGORIES { |
| 63 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) | 68 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) |
| 64 reports, err := g.getBinaryReportsFromGS(badPath, cat, nil) | 69 reports, err := g.getBinaryReportsFromGS(badPath, cat, nil) |
| 65 if err != nil { | 70 if err != nil { |
| 66 return err | 71 return err |
| 67 } | 72 } |
| 68 » » n := 0 | 73 » » b := 0 |
| 74 » » d := 0 |
| 69 for report := range reports { | 75 for report := range reports { |
| 70 » » » data.NewFuzzFound(cat, report) | 76 » » » // We always add the fuzzName, to avoid redownloading du
plicates over and over again. |
| 71 fuzzNames = append(fuzzNames, report.FuzzName) | 77 fuzzNames = append(fuzzNames, report.FuzzName) |
| 72 » » » n++ | 78 » » » if g.deduplicator.IsUnique(report) { |
| 79 » » » » data.NewFuzzFound(cat, report) |
| 80 » » » » b++ |
| 81 » » » } else { |
| 82 » » » » d++ |
| 83 » » » } |
| 84 |
| 73 } | 85 } |
| 74 » » glog.Infof("%d bad fuzzes freshly loaded from gs://%s/%s", n, co
nfig.GS.Bucket, badPath) | 86 » » glog.Infof("%d bad fuzzes (%d duplicate) freshly loaded from gs:
//%s/%s", b, d, config.GS.Bucket, badPath) |
| 87 » » data.StagingToCurrent() |
| 75 } | 88 } |
| 76 | 89 |
| 77 data.StagingToCurrent() | |
| 78 for _, category := range common.FUZZ_CATEGORIES { | 90 for _, category := range common.FUZZ_CATEGORIES { |
| 79 if err := g.Cache.StoreTree(data.StagingCopy(category), category
, revision); err != nil { | 91 if err := g.Cache.StoreTree(data.StagingCopy(category), category
, revision); err != nil { |
| 80 glog.Errorf("Problem storing category %s to boltDB: %s",
category, err) | 92 glog.Errorf("Problem storing category %s to boltDB: %s",
category, err) |
| 81 } | 93 } |
| 82 } | 94 } |
| 83 return g.Cache.StoreFuzzNames(fuzzNames, revision) | 95 return g.Cache.StoreFuzzNames(fuzzNames, revision) |
| 84 } | 96 } |
| 85 | 97 |
| 86 // LoadBinaryFuzzesFromGoogleStorage pulls all fuzzes out of GCS that are on the
given whitelist | 98 // LoadBinaryFuzzesFromGoogleStorage pulls all fuzzes out of GCS that are on the
given whitelist |
| 87 // and loads them into memory (as staging). After loading them, it updates the
cache | 99 // and loads them into memory (as staging). After loading them, it updates the
cache |
| 88 // and moves them from staging to the current copy. | 100 // and moves them from staging to the current copy. |
| 89 func (g *GSLoader) LoadBinaryFuzzesFromGoogleStorage(whitelist []string) error { | 101 func (g *GSLoader) LoadBinaryFuzzesFromGoogleStorage(whitelist []string) error { |
| 90 revision := config.FrontEnd.SkiaVersion.Hash | 102 revision := config.FrontEnd.SkiaVersion.Hash |
| 91 data.StagingFromCurrent() | 103 data.StagingFromCurrent() |
| 92 sort.Strings(whitelist) | 104 sort.Strings(whitelist) |
| 93 | 105 |
| 94 fuzzNames := make([]string, 0, 100) | 106 fuzzNames := make([]string, 0, 100) |
| 95 for _, cat := range common.FUZZ_CATEGORIES { | 107 for _, cat := range common.FUZZ_CATEGORIES { |
| 96 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) | 108 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) |
| 97 reports, err := g.getBinaryReportsFromGS(badPath, cat, whitelist
) | 109 reports, err := g.getBinaryReportsFromGS(badPath, cat, whitelist
) |
| 98 if err != nil { | 110 if err != nil { |
| 99 return err | 111 return err |
| 100 } | 112 } |
| 101 » » n := 0 | 113 » » b := 0 |
| 114 » » d := 0 |
| 102 for report := range reports { | 115 for report := range reports { |
| 103 » » » data.NewFuzzFound(cat, report) | 116 » » » // We always add the fuzzName, to avoid redownloading du
plicates over and over again. |
| 104 fuzzNames = append(fuzzNames, report.FuzzName) | 117 fuzzNames = append(fuzzNames, report.FuzzName) |
| 105 » » » n++ | 118 » » » if g.deduplicator.IsUnique(report) { |
| 119 » » » » data.NewFuzzFound(cat, report) |
| 120 » » » » b++ |
| 121 » » » } else { |
| 122 » » » » d++ |
| 123 » » » } |
| 106 } | 124 } |
| 107 » » glog.Infof("%d bad fuzzes freshly loaded from gs://%s/%s", n, co
nfig.GS.Bucket, badPath) | 125 » » glog.Infof("%d bad fuzzes (%d duplicate) incrementally loaded fr
om gs://%s/%s", b, d, config.GS.Bucket, badPath) |
| 108 } | 126 } |
| 109 data.StagingToCurrent() | 127 data.StagingToCurrent() |
| 110 | 128 |
| 111 oldBinaryFuzzNames, err := g.Cache.LoadFuzzNames(revision) | 129 oldBinaryFuzzNames, err := g.Cache.LoadFuzzNames(revision) |
| 112 if err != nil { | 130 if err != nil { |
| 113 glog.Warningf("Could not read old binary fuzz names from cache.
Continuing...", err) | 131 glog.Warningf("Could not read old binary fuzz names from cache.
Continuing...", err) |
| 114 oldBinaryFuzzNames = []string{} | 132 oldBinaryFuzzNames = []string{} |
| 115 } | 133 } |
| 116 for _, category := range common.FUZZ_CATEGORIES { | 134 for _, category := range common.FUZZ_CATEGORIES { |
| 117 if err := g.Cache.StoreTree(data.StagingCopy(category), category
, revision); err != nil { | 135 if err := g.Cache.StoreTree(data.StagingCopy(category), category
, revision); err != nil { |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 234 }, | 252 }, |
| 235 } | 253 } |
| 236 | 254 |
| 237 reports <- data.ParseReport(p) | 255 reports <- data.ParseReport(p) |
| 238 atomic.AddInt32(&g.completedCounter, 1) | 256 atomic.AddInt32(&g.completedCounter, 1) |
| 239 if g.completedCounter%100 == 0 { | 257 if g.completedCounter%100 == 0 { |
| 240 glog.Infof("%d fuzzes downloaded", g.completedCounter) | 258 glog.Infof("%d fuzzes downloaded", g.completedCounter) |
| 241 } | 259 } |
| 242 } | 260 } |
| 243 } | 261 } |
| OLD | NEW |