OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The LUCI Authors. All rights reserved. |
| 2 // Use of this source code is governed under the Apache License, Version 2.0 |
| 3 // that can be found in the LICENSE file. |
| 4 |
| 5 package main |
| 6 |
| 7 // Walk a given directory and perform an action on the files found. |
| 8 |
| 9 import ( |
| 10 "flag" |
| 11 "fmt" |
| 12 "io" |
| 13 "io/ioutil" |
| 14 "log" |
| 15 "os" |
| 16 "runtime" |
| 17 "sync/atomic" |
| 18 |
| 19 "github.com/dustin/go-humanize" |
| 20 "github.com/luci/luci-go/common/dirwalk" |
| 21 "github.com/luci/luci-go/common/isolated" |
| 22 ) |
| 23 |
| 24 var method = flag.String("method", "simple", "Method used to walk the tree") |
| 25 var dir = flag.String("dir", "", "Directory to walk") |
| 26 |
| 27 //var do = flags.Choice("do", "null", ["null", "print", "read"]) |
| 28 var do = flag.String("do", "nothing", "Action to perform on the files") |
| 29 var smallfilesize = flag.Int64("smallfilesize", 64*1024, "Size to consider a sma
ll file") |
| 30 var repeat = flag.Int("repeat", 1, "Repeat the walk x times") |
| 31 |
| 32 var maxworkers = flag.Int("maxworkers", 100, "Maximum number of workers to use."
) |
| 33 |
| 34 // NullWalker implements Walker. It counts the number of files of each type. |
| 35 type NullWalker struct { |
| 36 smallfiles uint64 |
| 37 largefiles uint64 |
| 38 } |
| 39 |
| 40 func (n *NullWalker) SmallFile(filename string, alldata []byte) { |
| 41 atomic.AddUint64(&n.smallfiles, 1) |
| 42 } |
| 43 func (n *NullWalker) LargeFile(filename string) { |
| 44 atomic.AddUint64(&n.largefiles, 1) |
| 45 } |
| 46 func (n *NullWalker) Error(pathname string, err error) { |
| 47 log.Fatalf("%s:%s", pathname, err) |
| 48 } |
| 49 func (n *NullWalker) Finished() { |
| 50 } |
| 51 |
| 52 // PrintWalker implements Walker. It prints the filename of each found file. |
| 53 type PrintWalker struct { |
| 54 NullWalker |
| 55 obuf io.Writer |
| 56 } |
| 57 |
| 58 func (p *PrintWalker) PrintFile(filename string) { |
| 59 fmt.Fprintln(p.obuf, filename) |
| 60 } |
| 61 func (p *PrintWalker) SmallFile(filename string, alldata []byte) { |
| 62 p.NullWalker.SmallFile(filename, alldata) |
| 63 p.PrintFile(filename) |
| 64 } |
| 65 func (p *PrintWalker) LargeFile(filename string) { |
| 66 p.NullWalker.LargeFile(filename) |
| 67 p.PrintFile(filename) |
| 68 } |
| 69 |
| 70 // SizeWalker implements Walker. It prints the filename and file size of each |
| 71 // found file. |
| 72 type SizeWalker struct { |
| 73 NullWalker |
| 74 obuf io.Writer |
| 75 } |
| 76 |
| 77 func (s *SizeWalker) SizeFile(filename string, size int64) { |
| 78 fmt.Fprintf(s.obuf, "%s: %d\n", filename, size) |
| 79 } |
| 80 func (s *SizeWalker) SmallFile(filename string, alldata []byte) { |
| 81 s.NullWalker.SmallFile(filename, alldata) |
| 82 s.SizeFile(filename, int64(len(alldata))) |
| 83 } |
| 84 func (s *SizeWalker) LargeFile(filename string) { |
| 85 s.NullWalker.LargeFile(filename) |
| 86 stat, err := os.Stat(filename) |
| 87 if err != nil { |
| 88 s.Error(filename, err) |
| 89 } else { |
| 90 s.SizeFile(filename, stat.Size()) |
| 91 } |
| 92 } |
| 93 |
| 94 // ReadWalker implements Walker. It reads the contents of each found file. |
| 95 type ReadWalker struct { |
| 96 NullWalker |
| 97 } |
| 98 |
| 99 func (r *ReadWalker) SmallFile(filename string, alldata []byte) { |
| 100 r.NullWalker.SmallFile(filename, alldata) |
| 101 } |
| 102 func (r *ReadWalker) LargeFile(filename string) { |
| 103 r.NullWalker.LargeFile(filename) |
| 104 _, err := ioutil.ReadFile(filename) |
| 105 if err != nil { |
| 106 r.Error(filename, err) |
| 107 } |
| 108 } |
| 109 |
| 110 // HashWalker implements Walker. It generates a hash for the contents of each |
| 111 // found file. |
| 112 type HashWalker struct { |
| 113 NullWalker |
| 114 obuf io.Writer |
| 115 } |
| 116 |
| 117 func (h *HashWalker) HashedFile(filename string, digest isolated.HexDigest) { |
| 118 fmt.Fprintf(h.obuf, "%s: %v\n", filename, digest) |
| 119 } |
| 120 func (h *HashWalker) SmallFile(filename string, alldata []byte) { |
| 121 h.NullWalker.SmallFile(filename, alldata) |
| 122 h.HashedFile(filename, isolated.HashBytes(alldata)) |
| 123 } |
| 124 func (h *HashWalker) LargeFile(filename string) { |
| 125 h.NullWalker.LargeFile(filename) |
| 126 d, _ := isolated.HashFile(filename) |
| 127 h.HashedFile(filename, isolated.HexDigest(d.Digest)) |
| 128 } |
| 129 |
| 130 // Walker which hashes using a worker tool |
| 131 type ToHash struct { |
| 132 filename string |
| 133 hasdata bool |
| 134 data []byte |
| 135 } |
| 136 |
| 137 // ParallelHashWalker implements Walker. It generates a hash for the contents |
| 138 // of each found file using multiple threads. |
| 139 type ParallelHashWalker struct { |
| 140 NullWalker |
| 141 obuf io.Writer |
| 142 workers int |
| 143 queue *chan ToHash |
| 144 finished chan bool |
| 145 } |
| 146 |
| 147 func ParallelHashWalkerWorker(name int, obuf io.Writer, queue <-chan ToHash, fin
ished chan<- bool) { |
| 148 fmt.Fprintf(obuf, "Starting hash worker %d\n", name) |
| 149 |
| 150 var filecount uint64 = 0 |
| 151 var bytecount uint64 = 0 |
| 152 for tohash := range queue { |
| 153 filecount += 1 |
| 154 |
| 155 var digest isolated.HexDigest |
| 156 if tohash.hasdata { |
| 157 bytecount += uint64(len(tohash.data)) |
| 158 digest = isolated.HashBytes(tohash.data) |
| 159 } else { |
| 160 d, _ := isolated.HashFile(tohash.filename) |
| 161 bytecount += uint64(d.Size) |
| 162 digest = isolated.HexDigest(d.Digest) |
| 163 } |
| 164 fmt.Fprintf(obuf, "%s: %v\n", tohash.filename, digest) |
| 165 } |
| 166 fmt.Fprintf(obuf, "Finished hash worker %d (hashed %d files, %s)\n", nam
e, filecount, humanize.Bytes(bytecount)) |
| 167 finished <- true |
| 168 } |
| 169 func CreateParallelHashWalker(obuf io.Writer) *ParallelHashWalker { |
| 170 var max int = *maxworkers |
| 171 |
| 172 maxProcs := runtime.GOMAXPROCS(0) |
| 173 if maxProcs < max { |
| 174 max = maxProcs |
| 175 } |
| 176 |
| 177 numCPU := runtime.NumCPU() |
| 178 if numCPU < maxProcs { |
| 179 max = numCPU |
| 180 } |
| 181 |
| 182 if max < *maxworkers { |
| 183 // FIXME: Warn |
| 184 } |
| 185 |
| 186 h := ParallelHashWalker{obuf: obuf, workers: max, finished: make(chan bo
ol)} |
| 187 return &h |
| 188 } |
| 189 func (h *ParallelHashWalker) Init() { |
| 190 if h.queue == nil { |
| 191 q := make(chan ToHash, h.workers) |
| 192 h.queue = &q |
| 193 for i := 0; i < h.workers; i++ { |
| 194 go ParallelHashWalkerWorker(i, h.obuf, *h.queue, h.finis
hed) |
| 195 } |
| 196 } |
| 197 } |
| 198 func (h *ParallelHashWalker) SmallFile(filename string, alldata []byte) { |
| 199 h.NullWalker.SmallFile(filename, alldata) |
| 200 h.Init() |
| 201 *h.queue <- ToHash{filename: filename, hasdata: true, data: alldata} |
| 202 } |
| 203 func (h *ParallelHashWalker) LargeFile(filename string) { |
| 204 h.NullWalker.LargeFile(filename) |
| 205 h.Init() |
| 206 *h.queue <- ToHash{filename: filename, hasdata: false} |
| 207 } |
| 208 func (h *ParallelHashWalker) Finished() { |
| 209 h.Init() |
| 210 close(*h.queue) |
| 211 for i := 0; i < h.workers; i++ { |
| 212 <-h.finished |
| 213 } |
| 214 fmt.Fprintln(h.obuf, "All workers finished.") |
| 215 h.queue = nil |
| 216 } |
| 217 |
| 218 func main() { |
| 219 flag.Parse() |
| 220 |
| 221 if _, err := os.Stat(*dir); err != nil { |
| 222 log.Fatalf("Directory not found: %s", err) |
| 223 } |
| 224 |
| 225 var stats *NullWalker |
| 226 var obs dirwalk.WalkObserver |
| 227 switch *do { |
| 228 case "nothing": |
| 229 o := &NullWalker{} |
| 230 stats = o |
| 231 obs = o |
| 232 case "print": |
| 233 o := &PrintWalker{obuf: os.Stderr} |
| 234 stats = &o.NullWalker |
| 235 obs = o |
| 236 case "size": |
| 237 o := &SizeWalker{obuf: os.Stderr} |
| 238 stats = &o.NullWalker |
| 239 obs = o |
| 240 case "read": |
| 241 o := &ReadWalker{} |
| 242 stats = &o.NullWalker |
| 243 obs = o |
| 244 case "hash": |
| 245 o := &HashWalker{obuf: os.Stderr} |
| 246 stats = &o.NullWalker |
| 247 obs = o |
| 248 case "phash": |
| 249 o := CreateParallelHashWalker(os.Stderr) |
| 250 stats = &o.NullWalker |
| 251 obs = o |
| 252 default: |
| 253 log.Fatalf("Invalid action '%s'", *do) |
| 254 } |
| 255 |
| 256 for i := 0; i < *repeat; i++ { |
| 257 stats.smallfiles = 0 |
| 258 stats.largefiles = 0 |
| 259 |
| 260 switch *method { |
| 261 case "simple": |
| 262 dirwalk.WalkBasic(*dir, *smallfilesize, obs) |
| 263 case "nostat": |
| 264 dirwalk.WalkNoStat(*dir, *smallfilesize, obs) |
| 265 case "parallel": |
| 266 dirwalk.WalkParallel(*dir, *smallfilesize, obs) |
| 267 default: |
| 268 log.Fatalf("Invalid walk method '%s'", *method) |
| 269 } |
| 270 fmt.Printf("Found %d small files and %d large files\n", stats.sm
allfiles, stats.largefiles) |
| 271 } |
| 272 fmt.Fprintf(os.Stderr, "Found %d small files and %d large files\n", stat
s.smallfiles, stats.largefiles) |
| 273 } |
OLD | NEW |