Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(161)

Side by Side Diff: common/dirwalk/tests/tools/walkdir/main.go

Issue 2054763004: luci-go/common/dirwalk: Code for walking a directory tree efficiently Base URL: https://github.com/luci/luci-go@master
Patch Set: Small updates. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The LUCI Authors. All rights reserved.
2 // Use of this source code is governed under the Apache License, Version 2.0
3 // that can be found in the LICENSE file.
4
5 package main
6
7 // Quick tool for generating directories to walk.
M-A Ruel 2016/09/15 14:31:03 same I know it's quick, and it's a tool (it's an
mithro 2016/09/20 12:41:45 Done. This comment was totally wrong anyway.
8
9 import (
10 "flag"
11 "fmt"
12 "io"
13 "io/ioutil"
14 "log"
15 "os"
16 "runtime"
17 "sync/atomic"
18
19 "github.com/dustin/go-humanize"
20 "github.com/luci/luci-go/common/dirwalk"
21 "github.com/luci/luci-go/common/isolated"
22 )
23
24 var method = flag.String("method", "simple", "Method used to walk the tree")
M-A Ruel 2016/09/15 14:31:03 Having these inside main(), as I noted in the othe
mithro 2016/09/20 12:41:45 See above.
25 var dir = flag.String("dir", "", "Directory to walk")
26
27 //var do = flags.Choice("do", "null", ["null", "print", "read"])
28 var do = flag.String("do", "nothing", "Action to perform on the files")
29 var smallfilesize = flag.Int64("smallfilesize", 64*1024, "Size to consider a sma ll file")
30 var repeat = flag.Int("repeat", 1, "Repeat the walk x times")
31
32 var maxworkers = flag.Int("maxworkers", 100, "Maximum number of workers to use." )
33
34 // Walker which does nothing but count the files of each type
M-A Ruel 2016/09/15 14:31:03 // NullWalker implements Walker. It only count the
mithro 2016/09/20 12:41:45 Done.
35 type NullWalker struct {
36 smallfiles uint64
M-A Ruel 2016/09/15 14:31:03 does this need to be uint64 or int would do? I thi
mithro 2016/09/20 12:41:45 Well, it can't be negative?
37 largefiles uint64
38 }
39
40 func (n *NullWalker) SmallFile(filename string, alldata []byte) {
41 atomic.AddUint64(&n.smallfiles, 1)
42 }
43 func (n *NullWalker) LargeFile(filename string) {
44 atomic.AddUint64(&n.largefiles, 1)
45 }
46 func (n *NullWalker) Error(pathname string, err error) {
47 log.Fatalf("%s:%s", pathname, err)
48 }
49 func (n *NullWalker) Finished() {
50 }
51
52 // Walker which just prints the filenames of everything
M-A Ruel 2016/09/15 14:31:03 Same for the rest
mithro 2016/09/20 12:41:45 Done.
53 type PrintWalker struct {
54 NullWalker
55 obuf io.Writer
56 }
57
58 func (p *PrintWalker) PrintFile(filename string) {
59 fmt.Fprintln(p.obuf, filename)
60 }
61 func (p *PrintWalker) SmallFile(filename string, alldata []byte) {
62 p.NullWalker.SmallFile(filename, alldata)
63 p.PrintFile(filename)
64 }
65 func (p *PrintWalker) LargeFile(filename string) {
66 p.NullWalker.LargeFile(filename)
67 p.PrintFile(filename)
68 }
69
70 // Walker which prints the size of everything
71 type SizeWalker struct {
72 NullWalker
73 obuf io.Writer
74 }
75
76 func (s *SizeWalker) SizeFile(filename string, size int64) {
77 fmt.Fprintf(s.obuf, "%s: %d\n", filename, size)
78 }
79 func (s *SizeWalker) SmallFile(filename string, alldata []byte) {
80 s.NullWalker.SmallFile(filename, alldata)
81 s.SizeFile(filename, int64(len(alldata)))
82 }
83 func (s *SizeWalker) LargeFile(filename string) {
84 s.NullWalker.LargeFile(filename)
85 stat, err := os.Stat(filename)
86 if err != nil {
87 s.Error(filename, err)
88 } else {
89 s.SizeFile(filename, stat.Size())
90 }
91 }
92
93 // Walker which reads the whole file
94 type ReadWalker struct {
95 NullWalker
96 }
97
98 func (r *ReadWalker) SmallFile(filename string, alldata []byte) {
99 r.NullWalker.SmallFile(filename, alldata)
100 }
101 func (r *ReadWalker) LargeFile(filename string) {
102 r.NullWalker.LargeFile(filename)
103 _, err := ioutil.ReadFile(filename)
104 if err != nil {
105 r.Error(filename, err)
106 }
107 }
108
109 // Walker which hashes all the files
110 type HashWalker struct {
111 NullWalker
112 obuf io.Writer
113 }
114
115 func (h *HashWalker) HashedFile(filename string, digest isolated.HexDigest) {
116 fmt.Fprintf(h.obuf, "%s: %v\n", filename, digest)
117 }
118 func (h *HashWalker) SmallFile(filename string, alldata []byte) {
119 h.NullWalker.SmallFile(filename, alldata)
120 h.HashedFile(filename, isolated.HashBytes(alldata))
121 }
122 func (h *HashWalker) LargeFile(filename string) {
123 h.NullWalker.LargeFile(filename)
124 d, _ := isolated.HashFile(filename)
125 h.HashedFile(filename, isolated.HexDigest(d.Digest))
126 }
127
128 // Walker which hashes using a worker tool
129 type ToHash struct {
130 filename string
131 hasdata bool
132 data []byte
133 }
134 type ParallelHashWalker struct {
135 NullWalker
136 obuf io.Writer
137 workers int
138 queue *chan ToHash
139 finished chan bool
140 }
141
142 func ParallelHashWalkerWorker(name int, obuf io.Writer, queue <-chan ToHash, fin ished chan<- bool) {
143 fmt.Fprintf(obuf, "Starting hash worker %d\n", name)
144
145 var filecount uint64 = 0
146 var bytecount uint64 = 0
147 for tohash := range queue {
148 filecount += 1
149
150 var digest isolated.HexDigest
151 if tohash.hasdata {
152 bytecount += uint64(len(tohash.data))
153 digest = isolated.HashBytes(tohash.data)
154 } else {
155 d, _ := isolated.HashFile(tohash.filename)
156 bytecount += uint64(d.Size)
157 digest = isolated.HexDigest(d.Digest)
158 }
159 fmt.Fprintf(obuf, "%s: %v\n", tohash.filename, digest)
160 }
161 fmt.Fprintf(obuf, "Finished hash worker %d (hashed %d files, %s)\n", nam e, filecount, humanize.Bytes(bytecount))
162 finished <- true
163 }
164 func CreateParallelHashWalker(obuf io.Writer) *ParallelHashWalker {
165 var max int = *maxworkers
166
167 maxProcs := runtime.GOMAXPROCS(0)
168 if maxProcs < max {
169 max = maxProcs
170 }
171
172 numCPU := runtime.NumCPU()
173 if numCPU < maxProcs {
174 max = numCPU
175 }
176
177 if max < *maxworkers {
178 // FIXME: Warn
179 }
180
181 h := ParallelHashWalker{obuf: obuf, workers: max, finished: make(chan bo ol)}
182 return &h
183 }
184 func (h *ParallelHashWalker) Init() {
185 if h.queue == nil {
186 q := make(chan ToHash, h.workers)
187 h.queue = &q
188 for i := 0; i < h.workers; i++ {
189 go ParallelHashWalkerWorker(i, h.obuf, *h.queue, h.finis hed)
190 }
191 }
192 }
193 func (h *ParallelHashWalker) SmallFile(filename string, alldata []byte) {
194 h.NullWalker.SmallFile(filename, alldata)
195 h.Init()
196 *h.queue <- ToHash{filename: filename, hasdata: true, data: alldata}
197 }
198 func (h *ParallelHashWalker) LargeFile(filename string) {
199 h.NullWalker.LargeFile(filename)
200 h.Init()
201 *h.queue <- ToHash{filename: filename, hasdata: false}
202 }
203 func (h *ParallelHashWalker) Finished() {
204 h.Init()
205 close(*h.queue)
206 for i := 0; i < h.workers; i++ {
207 <-h.finished
208 }
209 fmt.Fprintln(h.obuf, "All workers finished.")
210 h.queue = nil
211 }
212
213 func main() {
214 flag.Parse()
215
216 if _, err := os.Stat(*dir); err != nil {
217 log.Fatalf("Directory not found: %s", err)
218 }
219
220 var stats *NullWalker
221 var obs dirwalk.WalkObserver
222 switch *do {
223 case "nothing":
224 o := &NullWalker{}
225 stats = o
226 obs = o
227 case "print":
228 o := &PrintWalker{obuf: os.Stderr}
229 stats = &o.NullWalker
230 obs = o
231 case "size":
232 o := &SizeWalker{obuf: os.Stderr}
233 stats = &o.NullWalker
234 obs = o
235 case "read":
236 o := &ReadWalker{}
237 stats = &o.NullWalker
238 obs = o
239 case "hash":
240 o := &HashWalker{obuf: os.Stderr}
241 stats = &o.NullWalker
242 obs = o
243 case "phash":
244 o := CreateParallelHashWalker(os.Stderr)
245 stats = &o.NullWalker
246 obs = o
247 default:
248 log.Fatalf("Invalid action '%s'", *do)
249 }
250
251 for i := 0; i < *repeat; i++ {
252 stats.smallfiles = 0
253 stats.largefiles = 0
254
255 switch *method {
256 case "simple":
257 dirwalk.WalkBasic(*dir, *smallfilesize, obs)
258 case "nostat":
259 dirwalk.WalkNoStat(*dir, *smallfilesize, obs)
260 case "parallel":
261 dirwalk.WalkParallel(*dir, *smallfilesize, obs)
262 default:
263 log.Fatalf("Invalid walk method '%s'", *method)
264 }
265 fmt.Printf("Found %d small files and %d large files\n", stats.sm allfiles, stats.largefiles)
266 }
267 fmt.Fprintf(os.Stderr, "Found %d small files and %d large files\n", stat s.smallfiles, stats.largefiles)
268 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698