OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The LUCI Authors. All rights reserved. | |
2 // Use of this source code is governed under the Apache License, Version 2.0 | |
3 // that can be found in the LICENSE file. | |
4 | |
5 package main | |
6 | |
7 // Quick tool for generating directories to walk. | |
M-A Ruel
2016/09/15 14:31:03
same
I know it's quick, and it's a tool (it's an
mithro
2016/09/20 12:41:45
Done.
This comment was totally wrong anyway.
| |
8 | |
9 import ( | |
10 "flag" | |
11 "fmt" | |
12 "io" | |
13 "io/ioutil" | |
14 "log" | |
15 "os" | |
16 "runtime" | |
17 "sync/atomic" | |
18 | |
19 "github.com/dustin/go-humanize" | |
20 "github.com/luci/luci-go/common/dirwalk" | |
21 "github.com/luci/luci-go/common/isolated" | |
22 ) | |
23 | |
24 var method = flag.String("method", "simple", "Method used to walk the tree") | |
M-A Ruel
2016/09/15 14:31:03
Having these inside main(), as I noted in the othe
mithro
2016/09/20 12:41:45
See above.
| |
25 var dir = flag.String("dir", "", "Directory to walk") | |
26 | |
27 //var do = flags.Choice("do", "null", ["null", "print", "read"]) | |
28 var do = flag.String("do", "nothing", "Action to perform on the files") | |
29 var smallfilesize = flag.Int64("smallfilesize", 64*1024, "Size to consider a sma ll file") | |
30 var repeat = flag.Int("repeat", 1, "Repeat the walk x times") | |
31 | |
32 var maxworkers = flag.Int("maxworkers", 100, "Maximum number of workers to use." ) | |
33 | |
34 // Walker which does nothing but count the files of each type | |
M-A Ruel
2016/09/15 14:31:03
// NullWalker implements Walker. It only count the
mithro
2016/09/20 12:41:45
Done.
| |
35 type NullWalker struct { | |
36 smallfiles uint64 | |
M-A Ruel
2016/09/15 14:31:03
does this need to be uint64 or int would do? I thi
mithro
2016/09/20 12:41:45
Well, it can't be negative?
| |
37 largefiles uint64 | |
38 } | |
39 | |
40 func (n *NullWalker) SmallFile(filename string, alldata []byte) { | |
41 atomic.AddUint64(&n.smallfiles, 1) | |
42 } | |
43 func (n *NullWalker) LargeFile(filename string) { | |
44 atomic.AddUint64(&n.largefiles, 1) | |
45 } | |
46 func (n *NullWalker) Error(pathname string, err error) { | |
47 log.Fatalf("%s:%s", pathname, err) | |
48 } | |
49 func (n *NullWalker) Finished() { | |
50 } | |
51 | |
52 // Walker which just prints the filenames of everything | |
M-A Ruel
2016/09/15 14:31:03
Same for the rest
mithro
2016/09/20 12:41:45
Done.
| |
53 type PrintWalker struct { | |
54 NullWalker | |
55 obuf io.Writer | |
56 } | |
57 | |
58 func (p *PrintWalker) PrintFile(filename string) { | |
59 fmt.Fprintln(p.obuf, filename) | |
60 } | |
61 func (p *PrintWalker) SmallFile(filename string, alldata []byte) { | |
62 p.NullWalker.SmallFile(filename, alldata) | |
63 p.PrintFile(filename) | |
64 } | |
65 func (p *PrintWalker) LargeFile(filename string) { | |
66 p.NullWalker.LargeFile(filename) | |
67 p.PrintFile(filename) | |
68 } | |
69 | |
70 // Walker which prints the size of everything | |
71 type SizeWalker struct { | |
72 NullWalker | |
73 obuf io.Writer | |
74 } | |
75 | |
76 func (s *SizeWalker) SizeFile(filename string, size int64) { | |
77 fmt.Fprintf(s.obuf, "%s: %d\n", filename, size) | |
78 } | |
79 func (s *SizeWalker) SmallFile(filename string, alldata []byte) { | |
80 s.NullWalker.SmallFile(filename, alldata) | |
81 s.SizeFile(filename, int64(len(alldata))) | |
82 } | |
83 func (s *SizeWalker) LargeFile(filename string) { | |
84 s.NullWalker.LargeFile(filename) | |
85 stat, err := os.Stat(filename) | |
86 if err != nil { | |
87 s.Error(filename, err) | |
88 } else { | |
89 s.SizeFile(filename, stat.Size()) | |
90 } | |
91 } | |
92 | |
93 // Walker which reads the whole file | |
94 type ReadWalker struct { | |
95 NullWalker | |
96 } | |
97 | |
98 func (r *ReadWalker) SmallFile(filename string, alldata []byte) { | |
99 r.NullWalker.SmallFile(filename, alldata) | |
100 } | |
101 func (r *ReadWalker) LargeFile(filename string) { | |
102 r.NullWalker.LargeFile(filename) | |
103 _, err := ioutil.ReadFile(filename) | |
104 if err != nil { | |
105 r.Error(filename, err) | |
106 } | |
107 } | |
108 | |
109 // Walker which hashes all the files | |
110 type HashWalker struct { | |
111 NullWalker | |
112 obuf io.Writer | |
113 } | |
114 | |
115 func (h *HashWalker) HashedFile(filename string, digest isolated.HexDigest) { | |
116 fmt.Fprintf(h.obuf, "%s: %v\n", filename, digest) | |
117 } | |
118 func (h *HashWalker) SmallFile(filename string, alldata []byte) { | |
119 h.NullWalker.SmallFile(filename, alldata) | |
120 h.HashedFile(filename, isolated.HashBytes(alldata)) | |
121 } | |
122 func (h *HashWalker) LargeFile(filename string) { | |
123 h.NullWalker.LargeFile(filename) | |
124 d, _ := isolated.HashFile(filename) | |
125 h.HashedFile(filename, isolated.HexDigest(d.Digest)) | |
126 } | |
127 | |
128 // Walker which hashes using a worker tool | |
129 type ToHash struct { | |
130 filename string | |
131 hasdata bool | |
132 data []byte | |
133 } | |
134 type ParallelHashWalker struct { | |
135 NullWalker | |
136 obuf io.Writer | |
137 workers int | |
138 queue *chan ToHash | |
139 finished chan bool | |
140 } | |
141 | |
142 func ParallelHashWalkerWorker(name int, obuf io.Writer, queue <-chan ToHash, fin ished chan<- bool) { | |
143 fmt.Fprintf(obuf, "Starting hash worker %d\n", name) | |
144 | |
145 var filecount uint64 = 0 | |
146 var bytecount uint64 = 0 | |
147 for tohash := range queue { | |
148 filecount += 1 | |
149 | |
150 var digest isolated.HexDigest | |
151 if tohash.hasdata { | |
152 bytecount += uint64(len(tohash.data)) | |
153 digest = isolated.HashBytes(tohash.data) | |
154 } else { | |
155 d, _ := isolated.HashFile(tohash.filename) | |
156 bytecount += uint64(d.Size) | |
157 digest = isolated.HexDigest(d.Digest) | |
158 } | |
159 fmt.Fprintf(obuf, "%s: %v\n", tohash.filename, digest) | |
160 } | |
161 fmt.Fprintf(obuf, "Finished hash worker %d (hashed %d files, %s)\n", nam e, filecount, humanize.Bytes(bytecount)) | |
162 finished <- true | |
163 } | |
164 func CreateParallelHashWalker(obuf io.Writer) *ParallelHashWalker { | |
165 var max int = *maxworkers | |
166 | |
167 maxProcs := runtime.GOMAXPROCS(0) | |
168 if maxProcs < max { | |
169 max = maxProcs | |
170 } | |
171 | |
172 numCPU := runtime.NumCPU() | |
173 if numCPU < maxProcs { | |
174 max = numCPU | |
175 } | |
176 | |
177 if max < *maxworkers { | |
178 // FIXME: Warn | |
179 } | |
180 | |
181 h := ParallelHashWalker{obuf: obuf, workers: max, finished: make(chan bo ol)} | |
182 return &h | |
183 } | |
184 func (h *ParallelHashWalker) Init() { | |
185 if h.queue == nil { | |
186 q := make(chan ToHash, h.workers) | |
187 h.queue = &q | |
188 for i := 0; i < h.workers; i++ { | |
189 go ParallelHashWalkerWorker(i, h.obuf, *h.queue, h.finis hed) | |
190 } | |
191 } | |
192 } | |
193 func (h *ParallelHashWalker) SmallFile(filename string, alldata []byte) { | |
194 h.NullWalker.SmallFile(filename, alldata) | |
195 h.Init() | |
196 *h.queue <- ToHash{filename: filename, hasdata: true, data: alldata} | |
197 } | |
198 func (h *ParallelHashWalker) LargeFile(filename string) { | |
199 h.NullWalker.LargeFile(filename) | |
200 h.Init() | |
201 *h.queue <- ToHash{filename: filename, hasdata: false} | |
202 } | |
203 func (h *ParallelHashWalker) Finished() { | |
204 h.Init() | |
205 close(*h.queue) | |
206 for i := 0; i < h.workers; i++ { | |
207 <-h.finished | |
208 } | |
209 fmt.Fprintln(h.obuf, "All workers finished.") | |
210 h.queue = nil | |
211 } | |
212 | |
213 func main() { | |
214 flag.Parse() | |
215 | |
216 if _, err := os.Stat(*dir); err != nil { | |
217 log.Fatalf("Directory not found: %s", err) | |
218 } | |
219 | |
220 var stats *NullWalker | |
221 var obs dirwalk.WalkObserver | |
222 switch *do { | |
223 case "nothing": | |
224 o := &NullWalker{} | |
225 stats = o | |
226 obs = o | |
227 case "print": | |
228 o := &PrintWalker{obuf: os.Stderr} | |
229 stats = &o.NullWalker | |
230 obs = o | |
231 case "size": | |
232 o := &SizeWalker{obuf: os.Stderr} | |
233 stats = &o.NullWalker | |
234 obs = o | |
235 case "read": | |
236 o := &ReadWalker{} | |
237 stats = &o.NullWalker | |
238 obs = o | |
239 case "hash": | |
240 o := &HashWalker{obuf: os.Stderr} | |
241 stats = &o.NullWalker | |
242 obs = o | |
243 case "phash": | |
244 o := CreateParallelHashWalker(os.Stderr) | |
245 stats = &o.NullWalker | |
246 obs = o | |
247 default: | |
248 log.Fatalf("Invalid action '%s'", *do) | |
249 } | |
250 | |
251 for i := 0; i < *repeat; i++ { | |
252 stats.smallfiles = 0 | |
253 stats.largefiles = 0 | |
254 | |
255 switch *method { | |
256 case "simple": | |
257 dirwalk.WalkBasic(*dir, *smallfilesize, obs) | |
258 case "nostat": | |
259 dirwalk.WalkNoStat(*dir, *smallfilesize, obs) | |
260 case "parallel": | |
261 dirwalk.WalkParallel(*dir, *smallfilesize, obs) | |
262 default: | |
263 log.Fatalf("Invalid walk method '%s'", *method) | |
264 } | |
265 fmt.Printf("Found %d small files and %d large files\n", stats.sm allfiles, stats.largefiles) | |
266 } | |
267 fmt.Fprintf(os.Stderr, "Found %d small files and %d large files\n", stat s.smallfiles, stats.largefiles) | |
268 } | |
OLD | NEW |