Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The LUCI Authors. All rights reserved. | |
| 2 // Use of this source code is governed under the Apache License, Version 2.0 | |
| 3 // that can be found in the LICENSE file. | |
| 4 | |
| 5 package main | |
| 6 | |
| 7 // Quick tool for generating directories to walk. | |
|
M-A Ruel
2016/09/15 14:31:03
same
I know it's quick, and it's a tool (it's an
mithro
2016/09/20 12:41:45
Done.
This comment was totally wrong anyway.
| |
| 8 | |
| 9 import ( | |
| 10 "flag" | |
| 11 "fmt" | |
| 12 "io" | |
| 13 "io/ioutil" | |
| 14 "log" | |
| 15 "os" | |
| 16 "runtime" | |
| 17 "sync/atomic" | |
| 18 | |
| 19 "github.com/dustin/go-humanize" | |
| 20 "github.com/luci/luci-go/common/dirwalk" | |
| 21 "github.com/luci/luci-go/common/isolated" | |
| 22 ) | |
| 23 | |
| 24 var method = flag.String("method", "simple", "Method used to walk the tree") | |
|
M-A Ruel
2016/09/15 14:31:03
Having these inside main(), as I noted in the othe
mithro
2016/09/20 12:41:45
See above.
| |
| 25 var dir = flag.String("dir", "", "Directory to walk") | |
| 26 | |
| 27 //var do = flags.Choice("do", "null", ["null", "print", "read"]) | |
| 28 var do = flag.String("do", "nothing", "Action to perform on the files") | |
| 29 var smallfilesize = flag.Int64("smallfilesize", 64*1024, "Size to consider a sma ll file") | |
| 30 var repeat = flag.Int("repeat", 1, "Repeat the walk x times") | |
| 31 | |
| 32 var maxworkers = flag.Int("maxworkers", 100, "Maximum number of workers to use." ) | |
| 33 | |
| 34 // Walker which does nothing but count the files of each type | |
|
M-A Ruel
2016/09/15 14:31:03
// NullWalker implements Walker. It only count the
mithro
2016/09/20 12:41:45
Done.
| |
| 35 type NullWalker struct { | |
| 36 smallfiles uint64 | |
|
M-A Ruel
2016/09/15 14:31:03
does this need to be uint64 or int would do? I thi
mithro
2016/09/20 12:41:45
Well, it can't be negative?
| |
| 37 largefiles uint64 | |
| 38 } | |
| 39 | |
| 40 func (n *NullWalker) SmallFile(filename string, alldata []byte) { | |
| 41 atomic.AddUint64(&n.smallfiles, 1) | |
| 42 } | |
| 43 func (n *NullWalker) LargeFile(filename string) { | |
| 44 atomic.AddUint64(&n.largefiles, 1) | |
| 45 } | |
| 46 func (n *NullWalker) Error(pathname string, err error) { | |
| 47 log.Fatalf("%s:%s", pathname, err) | |
| 48 } | |
| 49 func (n *NullWalker) Finished() { | |
| 50 } | |
| 51 | |
| 52 // Walker which just prints the filenames of everything | |
|
M-A Ruel
2016/09/15 14:31:03
Same for the rest
mithro
2016/09/20 12:41:45
Done.
| |
| 53 type PrintWalker struct { | |
| 54 NullWalker | |
| 55 obuf io.Writer | |
| 56 } | |
| 57 | |
| 58 func (p *PrintWalker) PrintFile(filename string) { | |
| 59 fmt.Fprintln(p.obuf, filename) | |
| 60 } | |
| 61 func (p *PrintWalker) SmallFile(filename string, alldata []byte) { | |
| 62 p.NullWalker.SmallFile(filename, alldata) | |
| 63 p.PrintFile(filename) | |
| 64 } | |
| 65 func (p *PrintWalker) LargeFile(filename string) { | |
| 66 p.NullWalker.LargeFile(filename) | |
| 67 p.PrintFile(filename) | |
| 68 } | |
| 69 | |
| 70 // Walker which prints the size of everything | |
| 71 type SizeWalker struct { | |
| 72 NullWalker | |
| 73 obuf io.Writer | |
| 74 } | |
| 75 | |
| 76 func (s *SizeWalker) SizeFile(filename string, size int64) { | |
| 77 fmt.Fprintf(s.obuf, "%s: %d\n", filename, size) | |
| 78 } | |
| 79 func (s *SizeWalker) SmallFile(filename string, alldata []byte) { | |
| 80 s.NullWalker.SmallFile(filename, alldata) | |
| 81 s.SizeFile(filename, int64(len(alldata))) | |
| 82 } | |
| 83 func (s *SizeWalker) LargeFile(filename string) { | |
| 84 s.NullWalker.LargeFile(filename) | |
| 85 stat, err := os.Stat(filename) | |
| 86 if err != nil { | |
| 87 s.Error(filename, err) | |
| 88 } else { | |
| 89 s.SizeFile(filename, stat.Size()) | |
| 90 } | |
| 91 } | |
| 92 | |
| 93 // Walker which reads the whole file | |
| 94 type ReadWalker struct { | |
| 95 NullWalker | |
| 96 } | |
| 97 | |
| 98 func (r *ReadWalker) SmallFile(filename string, alldata []byte) { | |
| 99 r.NullWalker.SmallFile(filename, alldata) | |
| 100 } | |
| 101 func (r *ReadWalker) LargeFile(filename string) { | |
| 102 r.NullWalker.LargeFile(filename) | |
| 103 _, err := ioutil.ReadFile(filename) | |
| 104 if err != nil { | |
| 105 r.Error(filename, err) | |
| 106 } | |
| 107 } | |
| 108 | |
| 109 // Walker which hashes all the files | |
| 110 type HashWalker struct { | |
| 111 NullWalker | |
| 112 obuf io.Writer | |
| 113 } | |
| 114 | |
| 115 func (h *HashWalker) HashedFile(filename string, digest isolated.HexDigest) { | |
| 116 fmt.Fprintf(h.obuf, "%s: %v\n", filename, digest) | |
| 117 } | |
| 118 func (h *HashWalker) SmallFile(filename string, alldata []byte) { | |
| 119 h.NullWalker.SmallFile(filename, alldata) | |
| 120 h.HashedFile(filename, isolated.HashBytes(alldata)) | |
| 121 } | |
| 122 func (h *HashWalker) LargeFile(filename string) { | |
| 123 h.NullWalker.LargeFile(filename) | |
| 124 d, _ := isolated.HashFile(filename) | |
| 125 h.HashedFile(filename, isolated.HexDigest(d.Digest)) | |
| 126 } | |
| 127 | |
| 128 // Walker which hashes using a worker tool | |
| 129 type ToHash struct { | |
| 130 filename string | |
| 131 hasdata bool | |
| 132 data []byte | |
| 133 } | |
| 134 type ParallelHashWalker struct { | |
| 135 NullWalker | |
| 136 obuf io.Writer | |
| 137 workers int | |
| 138 queue *chan ToHash | |
| 139 finished chan bool | |
| 140 } | |
| 141 | |
| 142 func ParallelHashWalkerWorker(name int, obuf io.Writer, queue <-chan ToHash, fin ished chan<- bool) { | |
| 143 fmt.Fprintf(obuf, "Starting hash worker %d\n", name) | |
| 144 | |
| 145 var filecount uint64 = 0 | |
| 146 var bytecount uint64 = 0 | |
| 147 for tohash := range queue { | |
| 148 filecount += 1 | |
| 149 | |
| 150 var digest isolated.HexDigest | |
| 151 if tohash.hasdata { | |
| 152 bytecount += uint64(len(tohash.data)) | |
| 153 digest = isolated.HashBytes(tohash.data) | |
| 154 } else { | |
| 155 d, _ := isolated.HashFile(tohash.filename) | |
| 156 bytecount += uint64(d.Size) | |
| 157 digest = isolated.HexDigest(d.Digest) | |
| 158 } | |
| 159 fmt.Fprintf(obuf, "%s: %v\n", tohash.filename, digest) | |
| 160 } | |
| 161 fmt.Fprintf(obuf, "Finished hash worker %d (hashed %d files, %s)\n", nam e, filecount, humanize.Bytes(bytecount)) | |
| 162 finished <- true | |
| 163 } | |
| 164 func CreateParallelHashWalker(obuf io.Writer) *ParallelHashWalker { | |
| 165 var max int = *maxworkers | |
| 166 | |
| 167 maxProcs := runtime.GOMAXPROCS(0) | |
| 168 if maxProcs < max { | |
| 169 max = maxProcs | |
| 170 } | |
| 171 | |
| 172 numCPU := runtime.NumCPU() | |
| 173 if numCPU < maxProcs { | |
| 174 max = numCPU | |
| 175 } | |
| 176 | |
| 177 if max < *maxworkers { | |
| 178 // FIXME: Warn | |
| 179 } | |
| 180 | |
| 181 h := ParallelHashWalker{obuf: obuf, workers: max, finished: make(chan bo ol)} | |
| 182 return &h | |
| 183 } | |
| 184 func (h *ParallelHashWalker) Init() { | |
| 185 if h.queue == nil { | |
| 186 q := make(chan ToHash, h.workers) | |
| 187 h.queue = &q | |
| 188 for i := 0; i < h.workers; i++ { | |
| 189 go ParallelHashWalkerWorker(i, h.obuf, *h.queue, h.finis hed) | |
| 190 } | |
| 191 } | |
| 192 } | |
| 193 func (h *ParallelHashWalker) SmallFile(filename string, alldata []byte) { | |
| 194 h.NullWalker.SmallFile(filename, alldata) | |
| 195 h.Init() | |
| 196 *h.queue <- ToHash{filename: filename, hasdata: true, data: alldata} | |
| 197 } | |
| 198 func (h *ParallelHashWalker) LargeFile(filename string) { | |
| 199 h.NullWalker.LargeFile(filename) | |
| 200 h.Init() | |
| 201 *h.queue <- ToHash{filename: filename, hasdata: false} | |
| 202 } | |
| 203 func (h *ParallelHashWalker) Finished() { | |
| 204 h.Init() | |
| 205 close(*h.queue) | |
| 206 for i := 0; i < h.workers; i++ { | |
| 207 <-h.finished | |
| 208 } | |
| 209 fmt.Fprintln(h.obuf, "All workers finished.") | |
| 210 h.queue = nil | |
| 211 } | |
| 212 | |
| 213 func main() { | |
| 214 flag.Parse() | |
| 215 | |
| 216 if _, err := os.Stat(*dir); err != nil { | |
| 217 log.Fatalf("Directory not found: %s", err) | |
| 218 } | |
| 219 | |
| 220 var stats *NullWalker | |
| 221 var obs dirwalk.WalkObserver | |
| 222 switch *do { | |
| 223 case "nothing": | |
| 224 o := &NullWalker{} | |
| 225 stats = o | |
| 226 obs = o | |
| 227 case "print": | |
| 228 o := &PrintWalker{obuf: os.Stderr} | |
| 229 stats = &o.NullWalker | |
| 230 obs = o | |
| 231 case "size": | |
| 232 o := &SizeWalker{obuf: os.Stderr} | |
| 233 stats = &o.NullWalker | |
| 234 obs = o | |
| 235 case "read": | |
| 236 o := &ReadWalker{} | |
| 237 stats = &o.NullWalker | |
| 238 obs = o | |
| 239 case "hash": | |
| 240 o := &HashWalker{obuf: os.Stderr} | |
| 241 stats = &o.NullWalker | |
| 242 obs = o | |
| 243 case "phash": | |
| 244 o := CreateParallelHashWalker(os.Stderr) | |
| 245 stats = &o.NullWalker | |
| 246 obs = o | |
| 247 default: | |
| 248 log.Fatalf("Invalid action '%s'", *do) | |
| 249 } | |
| 250 | |
| 251 for i := 0; i < *repeat; i++ { | |
| 252 stats.smallfiles = 0 | |
| 253 stats.largefiles = 0 | |
| 254 | |
| 255 switch *method { | |
| 256 case "simple": | |
| 257 dirwalk.WalkBasic(*dir, *smallfilesize, obs) | |
| 258 case "nostat": | |
| 259 dirwalk.WalkNoStat(*dir, *smallfilesize, obs) | |
| 260 case "parallel": | |
| 261 dirwalk.WalkParallel(*dir, *smallfilesize, obs) | |
| 262 default: | |
| 263 log.Fatalf("Invalid walk method '%s'", *method) | |
| 264 } | |
| 265 fmt.Printf("Found %d small files and %d large files\n", stats.sm allfiles, stats.largefiles) | |
| 266 } | |
| 267 fmt.Fprintf(os.Stderr, "Found %d small files and %d large files\n", stat s.smallfiles, stats.largefiles) | |
| 268 } | |
| OLD | NEW |