Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(624)

Side by Side Diff: common/dirwalk/tests/tools/walkdir/main.go

Issue 2054763004: luci-go/common/dirwalk: Code for walking a directory tree efficiently Base URL: https://github.com/luci/luci-go@master
Patch Set: Rebase onto master. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The LUCI Authors. All rights reserved.
2 // Use of this source code is governed under the Apache License, Version 2.0
3 // that can be found in the LICENSE file.
4
5 package main
6
7 // Walk a given directory and perform an action on the files found.
8
9 import (
10 "flag"
11 "fmt"
12 "io"
13 "io/ioutil"
14 "log"
15 "os"
16 "runtime"
17 "sync/atomic"
18
19 "github.com/dustin/go-humanize"
20 "github.com/luci/luci-go/common/dirwalk"
21 "github.com/luci/luci-go/common/isolated"
22 )
23
24 var method = flag.String("method", "simple", "Method used to walk the tree")
25 var dir = flag.String("dir", "", "Directory to walk")
26
27 //var do = flags.Choice("do", "null", ["null", "print", "read"])
28 var do = flag.String("do", "nothing", "Action to perform on the files")
29 var smallfilesize = flag.Int64("smallfilesize", 64*1024, "Size to consider a sma ll file")
30 var repeat = flag.Int("repeat", 1, "Repeat the walk x times")
31
32 var maxworkers = flag.Int("maxworkers", 100, "Maximum number of workers to use." )
33
34 // NullWalker implements Walker. It counts the number of files of each type.
35 type NullWalker struct {
36 smallfiles uint64
37 largefiles uint64
38 }
39
40 func (n *NullWalker) SmallFile(filename string, alldata []byte) {
41 atomic.AddUint64(&n.smallfiles, 1)
42 }
43 func (n *NullWalker) LargeFile(filename string) {
44 atomic.AddUint64(&n.largefiles, 1)
45 }
46 func (n *NullWalker) Error(pathname string, err error) {
47 log.Fatalf("%s:%s", pathname, err)
48 }
49 func (n *NullWalker) Finished() {
50 }
51
52 // PrintWalker implements Walker. It prints the filename of each found file.
53 type PrintWalker struct {
54 NullWalker
55 obuf io.Writer
56 }
57
58 func (p *PrintWalker) PrintFile(filename string) {
59 fmt.Fprintln(p.obuf, filename)
60 }
61 func (p *PrintWalker) SmallFile(filename string, alldata []byte) {
62 p.NullWalker.SmallFile(filename, alldata)
63 p.PrintFile(filename)
64 }
65 func (p *PrintWalker) LargeFile(filename string) {
66 p.NullWalker.LargeFile(filename)
67 p.PrintFile(filename)
68 }
69
70 // SizeWalker implements Walker. It prints the filename and file size of each
71 // found file.
72 type SizeWalker struct {
73 NullWalker
74 obuf io.Writer
75 }
76
77 func (s *SizeWalker) SizeFile(filename string, size int64) {
78 fmt.Fprintf(s.obuf, "%s: %d\n", filename, size)
79 }
80 func (s *SizeWalker) SmallFile(filename string, alldata []byte) {
81 s.NullWalker.SmallFile(filename, alldata)
82 s.SizeFile(filename, int64(len(alldata)))
83 }
84 func (s *SizeWalker) LargeFile(filename string) {
85 s.NullWalker.LargeFile(filename)
86 stat, err := os.Stat(filename)
87 if err != nil {
88 s.Error(filename, err)
89 } else {
90 s.SizeFile(filename, stat.Size())
91 }
92 }
93
94 // ReadWalker implements Walker. It reads the contents of each found file.
95 type ReadWalker struct {
96 NullWalker
97 }
98
99 func (r *ReadWalker) SmallFile(filename string, alldata []byte) {
100 r.NullWalker.SmallFile(filename, alldata)
101 }
102 func (r *ReadWalker) LargeFile(filename string) {
103 r.NullWalker.LargeFile(filename)
104 _, err := ioutil.ReadFile(filename)
105 if err != nil {
106 r.Error(filename, err)
107 }
108 }
109
110 // HashWalker implements Walker. It generates a hash for the contents of each
111 // found file.
112 type HashWalker struct {
113 NullWalker
114 obuf io.Writer
115 }
116
117 func (h *HashWalker) HashedFile(filename string, digest isolated.HexDigest) {
118 fmt.Fprintf(h.obuf, "%s: %v\n", filename, digest)
119 }
120 func (h *HashWalker) SmallFile(filename string, alldata []byte) {
121 h.NullWalker.SmallFile(filename, alldata)
122 h.HashedFile(filename, isolated.HashBytes(alldata))
123 }
124 func (h *HashWalker) LargeFile(filename string) {
125 h.NullWalker.LargeFile(filename)
126 d, _ := isolated.HashFile(filename)
127 h.HashedFile(filename, isolated.HexDigest(d.Digest))
128 }
129
130 // Walker which hashes using a worker tool
131 type ToHash struct {
132 filename string
133 hasdata bool
134 data []byte
135 }
136
137 // ParallelHashWalker implements Walker. It generates a hash for the contents
138 // of each found file using multiple threads.
139 type ParallelHashWalker struct {
140 NullWalker
141 obuf io.Writer
142 workers int
143 queue *chan ToHash
144 finished chan bool
145 }
146
147 func ParallelHashWalkerWorker(name int, obuf io.Writer, queue <-chan ToHash, fin ished chan<- bool) {
148 fmt.Fprintf(obuf, "Starting hash worker %d\n", name)
149
150 var filecount uint64 = 0
151 var bytecount uint64 = 0
152 for tohash := range queue {
153 filecount += 1
154
155 var digest isolated.HexDigest
156 if tohash.hasdata {
157 bytecount += uint64(len(tohash.data))
158 digest = isolated.HashBytes(tohash.data)
159 } else {
160 d, _ := isolated.HashFile(tohash.filename)
161 bytecount += uint64(d.Size)
162 digest = isolated.HexDigest(d.Digest)
163 }
164 fmt.Fprintf(obuf, "%s: %v\n", tohash.filename, digest)
165 }
166 fmt.Fprintf(obuf, "Finished hash worker %d (hashed %d files, %s)\n", nam e, filecount, humanize.Bytes(bytecount))
167 finished <- true
168 }
169 func CreateParallelHashWalker(obuf io.Writer) *ParallelHashWalker {
170 var max int = *maxworkers
171
172 maxProcs := runtime.GOMAXPROCS(0)
173 if maxProcs < max {
174 max = maxProcs
175 }
176
177 numCPU := runtime.NumCPU()
178 if numCPU < maxProcs {
179 max = numCPU
180 }
181
182 if max < *maxworkers {
183 // FIXME: Warn
184 }
185
186 h := ParallelHashWalker{obuf: obuf, workers: max, finished: make(chan bo ol)}
187 return &h
188 }
189 func (h *ParallelHashWalker) Init() {
190 if h.queue == nil {
191 q := make(chan ToHash, h.workers)
192 h.queue = &q
193 for i := 0; i < h.workers; i++ {
194 go ParallelHashWalkerWorker(i, h.obuf, *h.queue, h.finis hed)
195 }
196 }
197 }
198 func (h *ParallelHashWalker) SmallFile(filename string, alldata []byte) {
199 h.NullWalker.SmallFile(filename, alldata)
200 h.Init()
201 *h.queue <- ToHash{filename: filename, hasdata: true, data: alldata}
202 }
203 func (h *ParallelHashWalker) LargeFile(filename string) {
204 h.NullWalker.LargeFile(filename)
205 h.Init()
206 *h.queue <- ToHash{filename: filename, hasdata: false}
207 }
208 func (h *ParallelHashWalker) Finished() {
209 h.Init()
210 close(*h.queue)
211 for i := 0; i < h.workers; i++ {
212 <-h.finished
213 }
214 fmt.Fprintln(h.obuf, "All workers finished.")
215 h.queue = nil
216 }
217
218 func main() {
219 flag.Parse()
220
221 if _, err := os.Stat(*dir); err != nil {
222 log.Fatalf("Directory not found: %s", err)
223 }
224
225 var stats *NullWalker
226 var obs dirwalk.WalkObserver
227 switch *do {
228 case "nothing":
229 o := &NullWalker{}
230 stats = o
231 obs = o
232 case "print":
233 o := &PrintWalker{obuf: os.Stderr}
234 stats = &o.NullWalker
235 obs = o
236 case "size":
237 o := &SizeWalker{obuf: os.Stderr}
238 stats = &o.NullWalker
239 obs = o
240 case "read":
241 o := &ReadWalker{}
242 stats = &o.NullWalker
243 obs = o
244 case "hash":
245 o := &HashWalker{obuf: os.Stderr}
246 stats = &o.NullWalker
247 obs = o
248 case "phash":
249 o := CreateParallelHashWalker(os.Stderr)
250 stats = &o.NullWalker
251 obs = o
252 default:
253 log.Fatalf("Invalid action '%s'", *do)
254 }
255
256 for i := 0; i < *repeat; i++ {
257 stats.smallfiles = 0
258 stats.largefiles = 0
259
260 switch *method {
261 case "simple":
262 dirwalk.WalkBasic(*dir, *smallfilesize, obs)
263 case "nostat":
264 dirwalk.WalkNoStat(*dir, *smallfilesize, obs)
265 case "parallel":
266 dirwalk.WalkParallel(*dir, *smallfilesize, obs)
267 default:
268 log.Fatalf("Invalid walk method '%s'", *method)
269 }
270 fmt.Printf("Found %d small files and %d large files\n", stats.sm allfiles, stats.largefiles)
271 }
272 fmt.Fprintf(os.Stderr, "Found %d small files and %d large files\n", stat s.smallfiles, stats.largefiles)
273 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698