Chromium Code Reviews| Index: common/dirwalk/tests/tools/walkdir/main.go |
| diff --git a/common/dirwalk/tests/tools/walkdir/main.go b/common/dirwalk/tests/tools/walkdir/main.go |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..fc13e1f6e6318f9cb46488461c72e88585fc11db |
| --- /dev/null |
| +++ b/common/dirwalk/tests/tools/walkdir/main.go |
| @@ -0,0 +1,268 @@ |
| +// Copyright 2016 The LUCI Authors. All rights reserved. |
| +// Use of this source code is governed under the Apache License, Version 2.0 |
| +// that can be found in the LICENSE file. |
| + |
| +package main |
| + |
| +// Quick tool for generating directories to walk. |
|
M-A Ruel
2016/09/15 14:31:03
same
I know it's quick, and it's a tool (it's an
mithro
2016/09/20 12:41:45
Done.
This comment was totally wrong anyway.
|
| + |
| +import ( |
| + "flag" |
| + "fmt" |
| + "io" |
| + "io/ioutil" |
| + "log" |
| + "os" |
| + "runtime" |
| + "sync/atomic" |
| + |
| + "github.com/dustin/go-humanize" |
| + "github.com/luci/luci-go/common/dirwalk" |
| + "github.com/luci/luci-go/common/isolated" |
| +) |
| + |
| +var method = flag.String("method", "simple", "Method used to walk the tree") |
|
M-A Ruel
2016/09/15 14:31:03
Having these inside main(), as I noted in the othe
mithro
2016/09/20 12:41:45
See above.
|
| +var dir = flag.String("dir", "", "Directory to walk") |
| + |
| +//var do = flags.Choice("do", "null", ["null", "print", "read"]) |
| +var do = flag.String("do", "nothing", "Action to perform on the files") |
| +var smallfilesize = flag.Int64("smallfilesize", 64*1024, "Size to consider a small file") |
| +var repeat = flag.Int("repeat", 1, "Repeat the walk x times") |
| + |
| +var maxworkers = flag.Int("maxworkers", 100, "Maximum number of workers to use.") |
| + |
| +// Walker which does nothing but count the files of each type |
|
M-A Ruel
2016/09/15 14:31:03
// NullWalker implements Walker. It only count the
mithro
2016/09/20 12:41:45
Done.
|
| +type NullWalker struct { |
| + smallfiles uint64 |
|
M-A Ruel
2016/09/15 14:31:03
does this need to be uint64 or int would do? I thi
mithro
2016/09/20 12:41:45
Well, it can't be negative?
|
| + largefiles uint64 |
| +} |
| + |
| +func (n *NullWalker) SmallFile(filename string, alldata []byte) { |
| + atomic.AddUint64(&n.smallfiles, 1) |
| +} |
| +func (n *NullWalker) LargeFile(filename string) { |
| + atomic.AddUint64(&n.largefiles, 1) |
| +} |
| +func (n *NullWalker) Error(pathname string, err error) { |
| + log.Fatalf("%s:%s", pathname, err) |
| +} |
| +func (n *NullWalker) Finished() { |
| +} |
| + |
| +// Walker which just prints the filenames of everything |
|
M-A Ruel
2016/09/15 14:31:03
Same for the rest
mithro
2016/09/20 12:41:45
Done.
|
| +type PrintWalker struct { |
| + NullWalker |
| + obuf io.Writer |
| +} |
| + |
| +func (p *PrintWalker) PrintFile(filename string) { |
| + fmt.Fprintln(p.obuf, filename) |
| +} |
| +func (p *PrintWalker) SmallFile(filename string, alldata []byte) { |
| + p.NullWalker.SmallFile(filename, alldata) |
| + p.PrintFile(filename) |
| +} |
| +func (p *PrintWalker) LargeFile(filename string) { |
| + p.NullWalker.LargeFile(filename) |
| + p.PrintFile(filename) |
| +} |
| + |
| +// Walker which prints the size of everything |
| +type SizeWalker struct { |
| + NullWalker |
| + obuf io.Writer |
| +} |
| + |
| +func (s *SizeWalker) SizeFile(filename string, size int64) { |
| + fmt.Fprintf(s.obuf, "%s: %d\n", filename, size) |
| +} |
| +func (s *SizeWalker) SmallFile(filename string, alldata []byte) { |
| + s.NullWalker.SmallFile(filename, alldata) |
| + s.SizeFile(filename, int64(len(alldata))) |
| +} |
| +func (s *SizeWalker) LargeFile(filename string) { |
| + s.NullWalker.LargeFile(filename) |
| + stat, err := os.Stat(filename) |
| + if err != nil { |
| + s.Error(filename, err) |
| + } else { |
| + s.SizeFile(filename, stat.Size()) |
| + } |
| +} |
| + |
| +// Walker which reads the whole file |
| +type ReadWalker struct { |
| + NullWalker |
| +} |
| + |
| +func (r *ReadWalker) SmallFile(filename string, alldata []byte) { |
| + r.NullWalker.SmallFile(filename, alldata) |
| +} |
| +func (r *ReadWalker) LargeFile(filename string) { |
| + r.NullWalker.LargeFile(filename) |
| + _, err := ioutil.ReadFile(filename) |
| + if err != nil { |
| + r.Error(filename, err) |
| + } |
| +} |
| + |
| +// Walker which hashes all the files |
| +type HashWalker struct { |
| + NullWalker |
| + obuf io.Writer |
| +} |
| + |
| +func (h *HashWalker) HashedFile(filename string, digest isolated.HexDigest) { |
| + fmt.Fprintf(h.obuf, "%s: %v\n", filename, digest) |
| +} |
| +func (h *HashWalker) SmallFile(filename string, alldata []byte) { |
| + h.NullWalker.SmallFile(filename, alldata) |
| + h.HashedFile(filename, isolated.HashBytes(alldata)) |
| +} |
| +func (h *HashWalker) LargeFile(filename string) { |
| + h.NullWalker.LargeFile(filename) |
| + d, _ := isolated.HashFile(filename) |
| + h.HashedFile(filename, isolated.HexDigest(d.Digest)) |
| +} |
| + |
| +// Walker which hashes using a worker tool |
| +type ToHash struct { |
| + filename string |
| + hasdata bool |
| + data []byte |
| +} |
| +type ParallelHashWalker struct { |
| + NullWalker |
| + obuf io.Writer |
| + workers int |
| + queue *chan ToHash |
| + finished chan bool |
| +} |
| + |
| +func ParallelHashWalkerWorker(name int, obuf io.Writer, queue <-chan ToHash, finished chan<- bool) { |
| + fmt.Fprintf(obuf, "Starting hash worker %d\n", name) |
| + |
| + var filecount uint64 = 0 |
| + var bytecount uint64 = 0 |
| + for tohash := range queue { |
| + filecount += 1 |
| + |
| + var digest isolated.HexDigest |
| + if tohash.hasdata { |
| + bytecount += uint64(len(tohash.data)) |
| + digest = isolated.HashBytes(tohash.data) |
| + } else { |
| + d, _ := isolated.HashFile(tohash.filename) |
| + bytecount += uint64(d.Size) |
| + digest = isolated.HexDigest(d.Digest) |
| + } |
| + fmt.Fprintf(obuf, "%s: %v\n", tohash.filename, digest) |
| + } |
| + fmt.Fprintf(obuf, "Finished hash worker %d (hashed %d files, %s)\n", name, filecount, humanize.Bytes(bytecount)) |
| + finished <- true |
| +} |
| +func CreateParallelHashWalker(obuf io.Writer) *ParallelHashWalker { |
| + var max int = *maxworkers |
| + |
| + maxProcs := runtime.GOMAXPROCS(0) |
| + if maxProcs < max { |
| + max = maxProcs |
| + } |
| + |
| + numCPU := runtime.NumCPU() |
| + if numCPU < maxProcs { |
| + max = numCPU |
| + } |
| + |
| + if max < *maxworkers { |
| + // FIXME: Warn |
| + } |
| + |
| + h := ParallelHashWalker{obuf: obuf, workers: max, finished: make(chan bool)} |
| + return &h |
| +} |
| +func (h *ParallelHashWalker) Init() { |
| + if h.queue == nil { |
| + q := make(chan ToHash, h.workers) |
| + h.queue = &q |
| + for i := 0; i < h.workers; i++ { |
| + go ParallelHashWalkerWorker(i, h.obuf, *h.queue, h.finished) |
| + } |
| + } |
| +} |
| +func (h *ParallelHashWalker) SmallFile(filename string, alldata []byte) { |
| + h.NullWalker.SmallFile(filename, alldata) |
| + h.Init() |
| + *h.queue <- ToHash{filename: filename, hasdata: true, data: alldata} |
| +} |
| +func (h *ParallelHashWalker) LargeFile(filename string) { |
| + h.NullWalker.LargeFile(filename) |
| + h.Init() |
| + *h.queue <- ToHash{filename: filename, hasdata: false} |
| +} |
| +func (h *ParallelHashWalker) Finished() { |
| + h.Init() |
| + close(*h.queue) |
| + for i := 0; i < h.workers; i++ { |
| + <-h.finished |
| + } |
| + fmt.Fprintln(h.obuf, "All workers finished.") |
| + h.queue = nil |
| +} |
| + |
| +func main() { |
| + flag.Parse() |
| + |
| + if _, err := os.Stat(*dir); err != nil { |
| + log.Fatalf("Directory not found: %s", err) |
| + } |
| + |
| + var stats *NullWalker |
| + var obs dirwalk.WalkObserver |
| + switch *do { |
| + case "nothing": |
| + o := &NullWalker{} |
| + stats = o |
| + obs = o |
| + case "print": |
| + o := &PrintWalker{obuf: os.Stderr} |
| + stats = &o.NullWalker |
| + obs = o |
| + case "size": |
| + o := &SizeWalker{obuf: os.Stderr} |
| + stats = &o.NullWalker |
| + obs = o |
| + case "read": |
| + o := &ReadWalker{} |
| + stats = &o.NullWalker |
| + obs = o |
| + case "hash": |
| + o := &HashWalker{obuf: os.Stderr} |
| + stats = &o.NullWalker |
| + obs = o |
| + case "phash": |
| + o := CreateParallelHashWalker(os.Stderr) |
| + stats = &o.NullWalker |
| + obs = o |
| + default: |
| + log.Fatalf("Invalid action '%s'", *do) |
| + } |
| + |
| + for i := 0; i < *repeat; i++ { |
| + stats.smallfiles = 0 |
| + stats.largefiles = 0 |
| + |
| + switch *method { |
| + case "simple": |
| + dirwalk.WalkBasic(*dir, *smallfilesize, obs) |
| + case "nostat": |
| + dirwalk.WalkNoStat(*dir, *smallfilesize, obs) |
| + case "parallel": |
| + dirwalk.WalkParallel(*dir, *smallfilesize, obs) |
| + default: |
| + log.Fatalf("Invalid walk method '%s'", *method) |
| + } |
| + fmt.Printf("Found %d small files and %d large files\n", stats.smallfiles, stats.largefiles) |
| + } |
| + fmt.Fprintf(os.Stderr, "Found %d small files and %d large files\n", stats.smallfiles, stats.largefiles) |
| +} |