common/dirwalk/tests/tools/walkdir/main.go - Issue 2054763004: luci-go/common/dirwalk: Code for walking a directory tree efficiently

Side by Side Diff: common/dirwalk/tests/tools/walkdir/main.go

Issue 2054763004: luci-go/common/dirwalk: Code for walking a directory tree efficiently Base URL: https://github.com/luci/luci-go@master

Patch Set: Small updates. Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2016 The LUCI Authors. All rights reserved.

	2 // Use of this source code is governed under the Apache License, Version 2.0

	3 // that can be found in the LICENSE file.

	4

	5 package main

	6

	7 // Quick tool for generating directories to walk.
	M-A Ruel 2016/09/15 14:31:03 same I know it's quick, and it's a tool (it's an same I know it's quick, and it's a tool (it's an executable), just say what it does. mithro 2016/09/20 12:41:45 Done. This comment was totally wrong anyway. Show quoted text On 2016/09/15 14:31:03, M-A Ruel wrote: > same > > I know it's quick, and it's a tool (it's an executable), just say what it does. Done. This comment was totally wrong anyway.
	8

	9 import (

	10 "flag"

	11 "fmt"

	12 "io"

	13 "io/ioutil"

	14 "log"

	15 "os"

	16 "runtime"

	17 "sync/atomic"

	18

	19 "github.com/dustin/go-humanize"

	20 "github.com/luci/luci-go/common/dirwalk"

	21 "github.com/luci/luci-go/common/isolated"

	22 )

	23

	24 var method = flag.String("method", "simple", "Method used to walk the tree")
	M-A Ruel 2016/09/15 14:31:03 Having these inside main(), as I noted in the othe Having these inside main(), as I noted in the other CL, would make this code more readable. mithro 2016/09/20 12:41:45 See above. Show quoted text On 2016/09/15 14:31:03, M-A Ruel wrote: > Having these inside main(), as I noted in the other CL, would make this code > more readable. See above.
	25 var dir = flag.String("dir", "", "Directory to walk")

	26

	27 //var do = flags.Choice("do", "null", ["null", "print", "read"])

	28 var do = flag.String("do", "nothing", "Action to perform on the files")

	29 var smallfilesize = flag.Int64("smallfilesize", 64*1024, "Size to consider a sma ll file")

	30 var repeat = flag.Int("repeat", 1, "Repeat the walk x times")

	31

	32 var maxworkers = flag.Int("maxworkers", 100, "Maximum number of workers to use." )

	33

	34 // Walker which does nothing but count the files of each type
	M-A Ruel 2016/09/15 14:31:03 // NullWalker implements Walker. It only count the // NullWalker implements Walker. It only count the number of files of each type. mithro 2016/09/20 12:41:45 Done. Show quoted text On 2016/09/15 14:31:03, M-A Ruel wrote: > // NullWalker implements Walker. It only count the number of files of each type. Done.
	35 type NullWalker struct {

	36 smallfiles uint64
	M-A Ruel 2016/09/15 14:31:03 does this need to be uint64 or int would do? I thi does this need to be uint64 or int would do? I think yes mithro 2016/09/20 12:41:45 Well, it can't be negative? Show quoted text On 2016/09/15 14:31:03, M-A Ruel wrote: > does this need to be uint64 or int would do? I think yes Well, it can't be negative?
	37 largefiles uint64

	38 }

	39

	40 func (n *NullWalker) SmallFile(filename string, alldata []byte) {

	41 atomic.AddUint64(&n.smallfiles, 1)

	42 }

	43 func (n *NullWalker) LargeFile(filename string) {

	44 atomic.AddUint64(&n.largefiles, 1)

	45 }

	46 func (n *NullWalker) Error(pathname string, err error) {

	47 log.Fatalf("%s:%s", pathname, err)

	48 }

	49 func (n *NullWalker) Finished() {

	50 }

	51

	52 // Walker which just prints the filenames of everything
	M-A Ruel 2016/09/15 14:31:03 Same for the rest Same for the rest mithro 2016/09/20 12:41:45 Done. Show quoted text On 2016/09/15 14:31:03, M-A Ruel wrote: > Same for the rest Done.
	53 type PrintWalker struct {

	54 NullWalker

	55 obuf io.Writer

	56 }

	57

	58 func (p *PrintWalker) PrintFile(filename string) {

	59 fmt.Fprintln(p.obuf, filename)

	60 }

	61 func (p *PrintWalker) SmallFile(filename string, alldata []byte) {

	62 p.NullWalker.SmallFile(filename, alldata)

	63 p.PrintFile(filename)

	64 }

	65 func (p *PrintWalker) LargeFile(filename string) {

	66 p.NullWalker.LargeFile(filename)

	67 p.PrintFile(filename)

	68 }

	69

	70 // Walker which prints the size of everything

	71 type SizeWalker struct {

	72 NullWalker

	73 obuf io.Writer

	74 }

	75

	76 func (s *SizeWalker) SizeFile(filename string, size int64) {

	77 fmt.Fprintf(s.obuf, "%s: %d\n", filename, size)

	78 }

	79 func (s *SizeWalker) SmallFile(filename string, alldata []byte) {

	80 s.NullWalker.SmallFile(filename, alldata)

	81 s.SizeFile(filename, int64(len(alldata)))

	82 }

	83 func (s *SizeWalker) LargeFile(filename string) {

	84 s.NullWalker.LargeFile(filename)

	85 stat, err := os.Stat(filename)

	86 if err != nil {

	87 s.Error(filename, err)

	88 } else {

	89 s.SizeFile(filename, stat.Size())

	90 }

	91 }

	92

	93 // Walker which reads the whole file

	94 type ReadWalker struct {

	95 NullWalker

	96 }

	97

	98 func (r *ReadWalker) SmallFile(filename string, alldata []byte) {

	99 r.NullWalker.SmallFile(filename, alldata)

	100 }

	101 func (r *ReadWalker) LargeFile(filename string) {

	102 r.NullWalker.LargeFile(filename)

	103 _, err := ioutil.ReadFile(filename)

	104 if err != nil {

	105 r.Error(filename, err)

	106 }

	107 }

	108

	109 // Walker which hashes all the files

	110 type HashWalker struct {

	111 NullWalker

	112 obuf io.Writer

	113 }

	114

	115 func (h *HashWalker) HashedFile(filename string, digest isolated.HexDigest) {

	116 fmt.Fprintf(h.obuf, "%s: %v\n", filename, digest)

	117 }

	118 func (h *HashWalker) SmallFile(filename string, alldata []byte) {

	119 h.NullWalker.SmallFile(filename, alldata)

	120 h.HashedFile(filename, isolated.HashBytes(alldata))

	121 }

	122 func (h *HashWalker) LargeFile(filename string) {

	123 h.NullWalker.LargeFile(filename)

	124 d, _ := isolated.HashFile(filename)

	125 h.HashedFile(filename, isolated.HexDigest(d.Digest))

	126 }

	127

	128 // Walker which hashes using a worker tool

	129 type ToHash struct {

	130 filename string

	131 hasdata bool

	132 data []byte

	133 }

	134 type ParallelHashWalker struct {

	135 NullWalker

	136 obuf io.Writer

	137 workers int

	138 queue *chan ToHash

	139 finished chan bool

	140 }

	141

	142 func ParallelHashWalkerWorker(name int, obuf io.Writer, queue <-chan ToHash, fin ished chan<- bool) {

	143 fmt.Fprintf(obuf, "Starting hash worker %d\n", name)

	144

	145 var filecount uint64 = 0

	146 var bytecount uint64 = 0

	147 for tohash := range queue {

	148 filecount += 1

	149

	150 var digest isolated.HexDigest

	151 if tohash.hasdata {

	152 bytecount += uint64(len(tohash.data))

	153 digest = isolated.HashBytes(tohash.data)

	154 } else {

	155 d, _ := isolated.HashFile(tohash.filename)

	156 bytecount += uint64(d.Size)

	157 digest = isolated.HexDigest(d.Digest)

	158 }

	159 fmt.Fprintf(obuf, "%s: %v\n", tohash.filename, digest)

	160 }

	161 fmt.Fprintf(obuf, "Finished hash worker %d (hashed %d files, %s)\n", nam e, filecount, humanize.Bytes(bytecount))

	162 finished <- true

	163 }

	164 func CreateParallelHashWalker(obuf io.Writer) *ParallelHashWalker {

	165 var max int = *maxworkers

	166

	167 maxProcs := runtime.GOMAXPROCS(0)

	168 if maxProcs < max {

	169 max = maxProcs

	170 }

	171

	172 numCPU := runtime.NumCPU()

	173 if numCPU < maxProcs {

	174 max = numCPU

	175 }

	176

	177 if max < *maxworkers {

	178 // FIXME: Warn

	179 }

	180

	181 h := ParallelHashWalker{obuf: obuf, workers: max, finished: make(chan bo ol)}

	182 return &h

	183 }

	184 func (h *ParallelHashWalker) Init() {

	185 if h.queue == nil {

	186 q := make(chan ToHash, h.workers)

	187 h.queue = &q

	188 for i := 0; i < h.workers; i++ {

	189 go ParallelHashWalkerWorker(i, h.obuf, *h.queue, h.finis hed)

	190 }

	191 }

	192 }

	193 func (h *ParallelHashWalker) SmallFile(filename string, alldata []byte) {

	194 h.NullWalker.SmallFile(filename, alldata)

	195 h.Init()

	196 *h.queue <- ToHash{filename: filename, hasdata: true, data: alldata}

	197 }

	198 func (h *ParallelHashWalker) LargeFile(filename string) {

	199 h.NullWalker.LargeFile(filename)

	200 h.Init()

	201 *h.queue <- ToHash{filename: filename, hasdata: false}

	202 }

	203 func (h *ParallelHashWalker) Finished() {

	204 h.Init()

	205 close(*h.queue)

	206 for i := 0; i < h.workers; i++ {

	207 <-h.finished

	208 }

	209 fmt.Fprintln(h.obuf, "All workers finished.")

	210 h.queue = nil

	211 }

	212

	213 func main() {

	214 flag.Parse()

	215

	216 if _, err := os.Stat(*dir); err != nil {

	217 log.Fatalf("Directory not found: %s", err)

	218 }

	219

	220 var stats *NullWalker

	221 var obs dirwalk.WalkObserver

	222 switch *do {

	223 case "nothing":

	224 o := &NullWalker{}

	225 stats = o

	226 obs = o

	227 case "print":

	228 o := &PrintWalker{obuf: os.Stderr}

	229 stats = &o.NullWalker

	230 obs = o

	231 case "size":

	232 o := &SizeWalker{obuf: os.Stderr}

	233 stats = &o.NullWalker

	234 obs = o

	235 case "read":

	236 o := &ReadWalker{}

	237 stats = &o.NullWalker

	238 obs = o

	239 case "hash":

	240 o := &HashWalker{obuf: os.Stderr}

	241 stats = &o.NullWalker

	242 obs = o

	243 case "phash":

	244 o := CreateParallelHashWalker(os.Stderr)

	245 stats = &o.NullWalker

	246 obs = o

	247 default:

	248 log.Fatalf("Invalid action '%s'", *do)

	249 }

	250

	251 for i := 0; i < *repeat; i++ {

	252 stats.smallfiles = 0

	253 stats.largefiles = 0

	254

	255 switch *method {

	256 case "simple":

	257 dirwalk.WalkBasic(dir, smallfilesize, obs)

	258 case "nostat":

	259 dirwalk.WalkNoStat(dir, smallfilesize, obs)

	260 case "parallel":

	261 dirwalk.WalkParallel(dir, smallfilesize, obs)

	262 default:

	263 log.Fatalf("Invalid walk method '%s'", *method)

	264 }

	265 fmt.Printf("Found %d small files and %d large files\n", stats.sm allfiles, stats.largefiles)

	266 }

	267 fmt.Fprintf(os.Stderr, "Found %d small files and %d large files\n", stat s.smallfiles, stats.largefiles)

	268 }

OLD	NEW

« common/dirwalk/tests/tools/gendir/main.go ('K') | « common/dirwalk/tests/tools/gendir/main.go ('k') | common/dirwalk/walkbasic.go » ('j') | common/dirwalk/walkbasic.go » ('J')