OLD | NEW |
1 package gsloader | 1 package gsloader |
2 | 2 |
3 import ( | 3 import ( |
4 "fmt" | 4 "fmt" |
5 "sort" | 5 "sort" |
6 "sync" | 6 "sync" |
7 "sync/atomic" | 7 "sync/atomic" |
8 | 8 |
9 "github.com/skia-dev/glog" | 9 "github.com/skia-dev/glog" |
10 "go.skia.org/infra/fuzzer/go/common" | 10 "go.skia.org/infra/fuzzer/go/common" |
11 "go.skia.org/infra/fuzzer/go/config" | 11 "go.skia.org/infra/fuzzer/go/config" |
| 12 "go.skia.org/infra/fuzzer/go/deduplicator" |
12 "go.skia.org/infra/fuzzer/go/frontend/data" | 13 "go.skia.org/infra/fuzzer/go/frontend/data" |
13 "go.skia.org/infra/fuzzer/go/fuzzcache" | 14 "go.skia.org/infra/fuzzer/go/fuzzcache" |
14 "go.skia.org/infra/go/gs" | 15 "go.skia.org/infra/go/gs" |
15 "google.golang.org/cloud/storage" | 16 "google.golang.org/cloud/storage" |
16 ) | 17 ) |
17 | 18 |
18 // LoadFromBoltDB loads the data.FuzzReport from FuzzReportCache associated with
the given hash. | 19 // LoadFromBoltDB loads the data.FuzzReport from FuzzReportCache associated with
the given hash. |
19 // The FuzzReport is first put into the staging fuzz cache, and then into the cu
rrent. | 20 // The FuzzReport is first put into the staging fuzz cache, and then into the cu
rrent. |
20 // If a cache for the commit does not exist, or there are other problems with th
e retrieval, | 21 // If a cache for the commit does not exist, or there are other problems with th
e retrieval, |
21 // an error is returned. | 22 // an error is returned. We do not need to deduplicate on extraction because |
| 23 // the fuzzes were deduplicated on storage. |
22 func LoadFromBoltDB(cache *fuzzcache.FuzzReportCache) error { | 24 func LoadFromBoltDB(cache *fuzzcache.FuzzReportCache) error { |
23 glog.Infof("Looking into cache for revision %s", config.FrontEnd.SkiaVer
sion.Hash) | 25 glog.Infof("Looking into cache for revision %s", config.FrontEnd.SkiaVer
sion.Hash) |
24 for _, category := range common.FUZZ_CATEGORIES { | 26 for _, category := range common.FUZZ_CATEGORIES { |
25 if staging, err := cache.LoadTree(category, config.FrontEnd.Skia
Version.Hash); err != nil { | 27 if staging, err := cache.LoadTree(category, config.FrontEnd.Skia
Version.Hash); err != nil { |
26 return fmt.Errorf("Problem decoding existing from bolt d
b: %s", err) | 28 return fmt.Errorf("Problem decoding existing from bolt d
b: %s", err) |
27 } else { | 29 } else { |
28 data.SetStaging(category, *staging) | 30 data.SetStaging(category, *staging) |
29 glog.Infof("Successfully loaded %s fuzzes from bolt db c
ache", category) | 31 glog.Infof("Successfully loaded %s fuzzes from bolt db c
ache", category) |
30 } | 32 } |
31 } | 33 } |
32 data.StagingToCurrent() | 34 data.StagingToCurrent() |
33 return nil | 35 return nil |
34 } | 36 } |
35 | 37 |
36 // GSLoader is a struct that handles downloading fuzzes from Google Storage. | 38 // GSLoader is a struct that handles downloading fuzzes from Google Storage. |
37 type GSLoader struct { | 39 type GSLoader struct { |
38 storageClient *storage.Client | 40 storageClient *storage.Client |
39 Cache *fuzzcache.FuzzReportCache | 41 Cache *fuzzcache.FuzzReportCache |
| 42 deduplicator *deduplicator.Deduplicator |
40 | 43 |
41 // completedCounter is the number of fuzzes that have been downloaded fr
om GCS, used for logging. | 44 // completedCounter is the number of fuzzes that have been downloaded fr
om GCS, used for logging. |
42 completedCounter int32 | 45 completedCounter int32 |
43 } | 46 } |
44 | 47 |
45 // New creates a GSLoader and returns it. | 48 // New creates a GSLoader and returns it. |
46 func New(s *storage.Client, c *fuzzcache.FuzzReportCache) *GSLoader { | 49 func New(s *storage.Client, c *fuzzcache.FuzzReportCache) *GSLoader { |
47 return &GSLoader{ | 50 return &GSLoader{ |
48 storageClient: s, | 51 storageClient: s, |
49 Cache: c, | 52 Cache: c, |
| 53 deduplicator: deduplicator.New(), |
50 } | 54 } |
51 } | 55 } |
52 | 56 |
53 // LoadFreshFromGoogleStorage pulls all fuzzes out of GCS and loads them into me
mory. | 57 // LoadFreshFromGoogleStorage pulls all fuzzes out of GCS and loads them into me
mory. |
54 // The "fresh" in the name refers to the fact that all other loaded fuzzes (if a
ny) | 58 // The "fresh" in the name refers to the fact that all other loaded fuzzes (if a
ny) |
55 // are written over, including in the cache. | 59 // are written over, including in the cache. |
56 // Upon completion, the full results are cached to a boltDB instance and moved f
rom staging | 60 // Upon completion, the full results are cached to a boltDB instance and moved f
rom staging |
57 // to the current copy. | 61 // to the current copy. |
58 func (g *GSLoader) LoadFreshFromGoogleStorage() error { | 62 func (g *GSLoader) LoadFreshFromGoogleStorage() error { |
59 revision := config.FrontEnd.SkiaVersion.Hash | 63 revision := config.FrontEnd.SkiaVersion.Hash |
60 data.ClearStaging() | 64 data.ClearStaging() |
| 65 g.deduplicator.Clear() |
61 fuzzNames := make([]string, 0, 100) | 66 fuzzNames := make([]string, 0, 100) |
62 for _, cat := range common.FUZZ_CATEGORIES { | 67 for _, cat := range common.FUZZ_CATEGORIES { |
63 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) | 68 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) |
64 reports, err := g.getBinaryReportsFromGS(badPath, cat, nil) | 69 reports, err := g.getBinaryReportsFromGS(badPath, cat, nil) |
65 if err != nil { | 70 if err != nil { |
66 return err | 71 return err |
67 } | 72 } |
68 » » n := 0 | 73 » » b := 0 |
| 74 » » d := 0 |
69 for report := range reports { | 75 for report := range reports { |
70 » » » data.NewFuzzFound(cat, report) | 76 » » » // We always add the fuzzName, to avoid redownloading du
plicates over and over again. |
71 fuzzNames = append(fuzzNames, report.FuzzName) | 77 fuzzNames = append(fuzzNames, report.FuzzName) |
72 » » » n++ | 78 » » » if g.deduplicator.IsUnique(report) { |
| 79 » » » » data.NewFuzzFound(cat, report) |
| 80 » » » » b++ |
| 81 » » » } else { |
| 82 » » » » d++ |
| 83 » » » } |
| 84 |
73 } | 85 } |
74 » » glog.Infof("%d bad fuzzes freshly loaded from gs://%s/%s", n, co
nfig.GS.Bucket, badPath) | 86 » » glog.Infof("%d bad fuzzes (%d duplicate) freshly loaded from gs:
//%s/%s", b, d, config.GS.Bucket, badPath) |
| 87 » » data.StagingToCurrent() |
75 } | 88 } |
76 | 89 |
77 data.StagingToCurrent() | |
78 for _, category := range common.FUZZ_CATEGORIES { | 90 for _, category := range common.FUZZ_CATEGORIES { |
79 if err := g.Cache.StoreTree(data.StagingCopy(category), category
, revision); err != nil { | 91 if err := g.Cache.StoreTree(data.StagingCopy(category), category
, revision); err != nil { |
80 glog.Errorf("Problem storing category %s to boltDB: %s",
category, err) | 92 glog.Errorf("Problem storing category %s to boltDB: %s",
category, err) |
81 } | 93 } |
82 } | 94 } |
83 return g.Cache.StoreFuzzNames(fuzzNames, revision) | 95 return g.Cache.StoreFuzzNames(fuzzNames, revision) |
84 } | 96 } |
85 | 97 |
86 // LoadBinaryFuzzesFromGoogleStorage pulls all fuzzes out of GCS that are on the
given whitelist | 98 // LoadBinaryFuzzesFromGoogleStorage pulls all fuzzes out of GCS that are on the
given whitelist |
87 // and loads them into memory (as staging). After loading them, it updates the
cache | 99 // and loads them into memory (as staging). After loading them, it updates the
cache |
88 // and moves them from staging to the current copy. | 100 // and moves them from staging to the current copy. |
89 func (g *GSLoader) LoadBinaryFuzzesFromGoogleStorage(whitelist []string) error { | 101 func (g *GSLoader) LoadBinaryFuzzesFromGoogleStorage(whitelist []string) error { |
90 revision := config.FrontEnd.SkiaVersion.Hash | 102 revision := config.FrontEnd.SkiaVersion.Hash |
91 data.StagingFromCurrent() | 103 data.StagingFromCurrent() |
92 sort.Strings(whitelist) | 104 sort.Strings(whitelist) |
93 | 105 |
94 fuzzNames := make([]string, 0, 100) | 106 fuzzNames := make([]string, 0, 100) |
95 for _, cat := range common.FUZZ_CATEGORIES { | 107 for _, cat := range common.FUZZ_CATEGORIES { |
96 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) | 108 badPath := fmt.Sprintf("%s/%s/bad", cat, revision) |
97 reports, err := g.getBinaryReportsFromGS(badPath, cat, whitelist
) | 109 reports, err := g.getBinaryReportsFromGS(badPath, cat, whitelist
) |
98 if err != nil { | 110 if err != nil { |
99 return err | 111 return err |
100 } | 112 } |
101 » » n := 0 | 113 » » b := 0 |
| 114 » » d := 0 |
102 for report := range reports { | 115 for report := range reports { |
103 » » » data.NewFuzzFound(cat, report) | 116 » » » // We always add the fuzzName, to avoid redownloading du
plicates over and over again. |
104 fuzzNames = append(fuzzNames, report.FuzzName) | 117 fuzzNames = append(fuzzNames, report.FuzzName) |
105 » » » n++ | 118 » » » if g.deduplicator.IsUnique(report) { |
| 119 » » » » data.NewFuzzFound(cat, report) |
| 120 » » » » b++ |
| 121 » » » } else { |
| 122 » » » » d++ |
| 123 » » » } |
106 } | 124 } |
107 » » glog.Infof("%d bad fuzzes freshly loaded from gs://%s/%s", n, co
nfig.GS.Bucket, badPath) | 125 » » glog.Infof("%d bad fuzzes (%d duplicate) incrementally loaded fr
om gs://%s/%s", b, d, config.GS.Bucket, badPath) |
108 } | 126 } |
109 data.StagingToCurrent() | 127 data.StagingToCurrent() |
110 | 128 |
111 oldBinaryFuzzNames, err := g.Cache.LoadFuzzNames(revision) | 129 oldBinaryFuzzNames, err := g.Cache.LoadFuzzNames(revision) |
112 if err != nil { | 130 if err != nil { |
113 glog.Warningf("Could not read old binary fuzz names from cache.
Continuing...", err) | 131 glog.Warningf("Could not read old binary fuzz names from cache.
Continuing...", err) |
114 oldBinaryFuzzNames = []string{} | 132 oldBinaryFuzzNames = []string{} |
115 } | 133 } |
116 for _, category := range common.FUZZ_CATEGORIES { | 134 for _, category := range common.FUZZ_CATEGORIES { |
117 if err := g.Cache.StoreTree(data.StagingCopy(category), category
, revision); err != nil { | 135 if err := g.Cache.StoreTree(data.StagingCopy(category), category
, revision); err != nil { |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
234 }, | 252 }, |
235 } | 253 } |
236 | 254 |
237 reports <- data.ParseReport(p) | 255 reports <- data.ParseReport(p) |
238 atomic.AddInt32(&g.completedCounter, 1) | 256 atomic.AddInt32(&g.completedCounter, 1) |
239 if g.completedCounter%100 == 0 { | 257 if g.completedCounter%100 == 0 { |
240 glog.Infof("%d fuzzes downloaded", g.completedCounter) | 258 glog.Infof("%d fuzzes downloaded", g.completedCounter) |
241 } | 259 } |
242 } | 260 } |
243 } | 261 } |
OLD | NEW |