 Chromium Code Reviews
 Chromium Code Reviews Issue 2054763004:
  luci-go/common/dirwalk: Code for walking a directory tree efficiently 
  Base URL: https://github.com/luci/luci-go@master
    
  
    Issue 2054763004:
  luci-go/common/dirwalk: Code for walking a directory tree efficiently 
  Base URL: https://github.com/luci/luci-go@master| Index: common/dirwalk/tests/tools/gendir/generate.go | 
| diff --git a/common/dirwalk/tests/tools/gendir/generate.go b/common/dirwalk/tests/tools/gendir/generate.go | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..884384de8614f61d7d2ba7785d20c88803bf1832 | 
| --- /dev/null | 
| +++ b/common/dirwalk/tests/tools/gendir/generate.go | 
| @@ -0,0 +1,246 @@ | 
| +// Copyright 2016 The LUCI Authors. All rights reserved. | 
| +// Use of this source code is governed under the Apache License, Version 2.0 | 
| +// that can be found in the LICENSE file. | 
| + | 
| +package main | 
| + | 
| +// Tools for generating test directories. | 
| + | 
| +import ( | 
| + "fmt" | 
| + "log" | 
| + "math/rand" | 
| + "os" | 
| + "path" | 
| + | 
| + "github.com/dustin/go-humanize" | 
| +) | 
| + | 
| +func min(a uint64, b uint64) uint64 { | 
| + if a > b { | 
| + return b | 
| + } else { | 
| 
M-A Ruel
2016/09/15 14:31:02
}
no need for else
same below
 
mithro
2016/09/20 12:41:44
Done.
 | 
| + return a | 
| + } | 
| +} | 
| +func max(a uint64, b uint64) uint64 { | 
| + if a < b { | 
| + return b | 
| + } else { | 
| + return a | 
| + } | 
| +} | 
| + | 
| +func randChar(r *rand.Rand, runes []rune) rune { | 
| + return runes[r.Intn(len(runes))] | 
| +} | 
| + | 
| +func randStr(r *rand.Rand, length uint64, runes []rune) string { | 
| 
M-A Ruel
2016/09/15 14:31:02
I'd prefer length to be int.
It's a bit confusing
 
mithro
2016/09/20 12:41:44
Negative length isn't valid though?
 
M-A Ruel
2016/09/20 16:37:27
uint is sparingly used in Go, it's really only use
 | 
| + str := make([]rune, length) | 
| + for i := range str { | 
| + str[i] = randChar(r, runes) | 
| + } | 
| + return string(str) | 
| +} | 
| + | 
| +func randBetween(r *rand.Rand, min uint64, max uint64) uint64 { | 
| + if min == max { | 
| + return min | 
| + } | 
| + return uint64(r.Int63n(int64(max-min))) + min | 
| +} | 
| + | 
| +// FIXME: Maybe some UTF-8 characters? | 
| +var filenameChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-") | 
| + | 
| +func filenameRandom(r *rand.Rand, length uint64) string { | 
| + return randStr(r, length, filenameChars) | 
| +} | 
| + | 
| +type DirGen interface { | 
| + Create(seed uint64, num int) | 
| +} | 
| + | 
| +type FileType int | 
| 
M-A Ruel
2016/09/15 14:31:02
I'd prefer to call it Compressibility and have a N
 
mithro
2016/09/20 12:41:44
I think it is the *type* which is the important fa
 | 
| + | 
| +const ( | 
| + FILETYPE_BIN_RAND FileType = iota // Truly random binary data (totally uncompressible) | 
| + FILETYPE_TXT_RAND // Truly random text data (mostly uncompressible) | 
| + FILETYPE_BIN_REPEAT // Repeated binary data (compressible) | 
| + FILETYPE_TXT_REPEAT // Repeated text data (very compressible) | 
| + FILETYPE_TXT_LOREM // Lorem Ipsum txt data (very compressible) | 
| + | 
| + FILETYPE_MAX | 
| +) | 
| + | 
| +var FileTypeName []string = []string{ | 
| + "Random Binary", | 
| + "Random Text", | 
| + "Repeated Binary", | 
| + "Repeated Text", | 
| + "Lorem Text", | 
| +} | 
| + | 
| +func (f FileType) String() string { | 
| + return FileTypeName[int(f)] | 
| +} | 
| + | 
| +// FIXME: Maybe some UTF-8 characters? | 
| 
M-A Ruel
2016/09/15 14:31:02
It depends; for file paths, you want UTF-8. For co
 | 
| +var textChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-.") | 
| + | 
| +const ( | 
| + BLOCKSIZE uint64 = 1 * 1024 * 1024 // 1 Megabyte | 
| + | 
| + // Maximum 4k long repeated sequences | 
| + SEQUENCE_MINSIZE uint64 = 16 | 
| + SEQUENCE_MAXSIZE uint64 = 4 * 1024 | 
| +) | 
| + | 
| +func writeFile(r *rand.Rand, filename string, filetype FileType, filesize uint64) { | 
| + f, err := os.Create(filename) | 
| + if err != nil { | 
| + log.Fatal(err) | 
| 
M-A Ruel
2016/09/15 14:31:02
return an error, no need to crash.
 
mithro
2016/09/20 12:41:44
We can't really recover in this case?
We depend o
 
M-A Ruel
2016/09/20 16:37:27
You return the error, the error is surfaced to the
 
mithro
2016/09/22 11:19:58
Bubbling that error all the way up to the top seem
 | 
| + } | 
| + defer f.Close() | 
| + | 
| + var written uint64 = 0 | 
| + for written < filesize { | 
| + content := make([]byte, min(filesize-written, BLOCKSIZE)) | 
| + | 
| + // Generate a block of content | 
| + switch filetype { | 
| + case FILETYPE_BIN_RAND: | 
| 
M-A Ruel
2016/09/15 14:31:02
You should create a generator so basically:
// G
 
mithro
2016/09/20 12:41:44
There was a specific reason I didn't do a generato
 
mithro
2016/09/22 11:19:58
Done.
 | 
| + r.Read(content) | 
| + | 
| + case FILETYPE_TXT_RAND: | 
| + // Runes can be multiple bytes long | 
| + for i := 0; i < len(content); { | 
| + bytes := []byte(string(randChar(r, textChars))) | 
| + for j := range bytes { | 
| + content[i+j] = bytes[j] | 
| + } | 
| + i += len(bytes) | 
| + } | 
| + | 
| + case FILETYPE_BIN_REPEAT: | 
| + var sequence []byte = make([]byte, randBetween(r, SEQUENCE_MINSIZE, SEQUENCE_MAXSIZE)) | 
| + r.Read(sequence) | 
| + | 
| + for i := range content { | 
| + content[i] = sequence[i%len(sequence)] | 
| + } | 
| + | 
| + case FILETYPE_TXT_REPEAT, FILETYPE_TXT_LOREM: | 
| + var sequence []byte | 
| + | 
| + switch filetype { | 
| + case FILETYPE_TXT_REPEAT: | 
| + // FIXME: As runes can be multiple bytes long, this could technical | 
| + // be longer then SEQUENCE_MAXSIZE, but don't think we care? | 
| + sequence = []byte(randStr(r, randBetween(r, SEQUENCE_MINSIZE, SEQUENCE_MAXSIZE), textChars)) | 
| + case FILETYPE_TXT_LOREM: | 
| + sequence = []byte(lorem) | 
| + } | 
| + | 
| + for i := range content { | 
| + content[i] = sequence[i%len(sequence)] | 
| + } | 
| + } | 
| + f.Write(content) | 
| + written += uint64(len(content)) | 
| + } | 
| +} | 
| + | 
| +const ( | 
| + FILENAME_MINSIZE uint64 = 4 | 
| + FILENAME_MAXSIZE uint64 = 20 | 
| +) | 
| + | 
| +// Generate num files between (min, max) size | 
| +func GenerateFiles(r *rand.Rand, dir string, num uint64, filesize_min uint64, filesize_max uint64) { | 
| 
M-A Ruel
2016/09/15 14:31:02
One thing I care a lot about is to ensure that the
 
mithro
2016/09/20 12:41:44
This kind of function kind of exists at the bottom
 | 
| + for i := uint64(0); i < num; i++ { | 
| + var filename string | 
| + var filepath string | 
| + for true { | 
| + filename = filenameRandom(r, randBetween(r, FILENAME_MINSIZE, FILENAME_MAXSIZE)) | 
| + filepath = path.Join(dir, filename) | 
| + if _, err := os.Stat(filepath); os.IsNotExist(err) { | 
| + break | 
| + } | 
| + } | 
| + filetype := FileType(r.Intn(int(FILETYPE_MAX))) | 
| + filesize := randBetween(r, filesize_min, filesize_max) | 
| + | 
| + if num < 1000 { | 
| + fmt.Printf("File: %-40s %-20s (%s)\n", filename, filetype.String(), humanize.Bytes(filesize)) | 
| + } | 
| + writeFile(r, filepath, filetype, filesize) | 
| + } | 
| +} | 
| + | 
| +// Generate num directories | 
| +func GenerateDirs(r *rand.Rand, dir string, num uint64) []string { | 
| + var result []string | 
| + | 
| + for i := uint64(0); i < num; i++ { | 
| + var dirname string | 
| + var dirpath string | 
| + for true { | 
| + dirname = filenameRandom(r, randBetween(r, FILENAME_MINSIZE, FILENAME_MAXSIZE)) | 
| + dirpath = path.Join(dir, dirname) | 
| + if _, err := os.Stat(dirpath); os.IsNotExist(err) { | 
| + break | 
| + } | 
| + } | 
| + | 
| + if err := os.MkdirAll(dirpath, 0755); err != nil { | 
| + log.Fatal(err) | 
| 
M-A Ruel
2016/09/20 16:37:27
same here.
 | 
| + } | 
| + result = append(result, dirpath) | 
| + } | 
| + return result | 
| +} | 
| + | 
| +type FileSettings struct { | 
| + MinNumber uint64 | 
| + MaxNumber uint64 | 
| + MinSize uint64 | 
| + MaxSize uint64 | 
| +} | 
| + | 
| +type DirSettings struct { | 
| + Number []uint64 | 
| + MinFileDepth uint64 | 
| +} | 
| + | 
| +type TreeSettings struct { | 
| + Files []FileSettings | 
| + Dir DirSettings | 
| +} | 
| + | 
| +func generateTreeInternal(r *rand.Rand, dir string, depth uint64, settings *TreeSettings) { | 
| + fmt.Printf("%04d:%s -->\n", depth, dir) | 
| + // Generate the files in this directory | 
| + if depth >= settings.Dir.MinFileDepth { | 
| + for _, files := range settings.Files { | 
| + numfiles := randBetween(r, files.MinNumber, files.MaxNumber) | 
| + fmt.Printf("%04d:%s: Generating %d files (between %s and %s)\n", depth, dir, numfiles, humanize.Bytes(files.MinSize), humanize.Bytes(files.MaxSize)) | 
| + GenerateFiles(r, dir, numfiles, files.MinSize, files.MaxSize) | 
| + } | 
| + } | 
| + | 
| + // Generate another depth of directories | 
| + if depth < uint64(len(settings.Dir.Number)) { | 
| + numdirs := settings.Dir.Number[depth] | 
| + fmt.Printf("%04d:%s: Generating %d directories\n", depth, dir, numdirs) | 
| + for _, childpath := range GenerateDirs(r, dir, numdirs) { | 
| + generateTreeInternal(r, childpath, depth+1, settings) | 
| + } | 
| + } | 
| + fmt.Printf("%04d:%s <--\n", depth, dir) | 
| +} | 
| + | 
| +func GenerateTree(r *rand.Rand, rootdir string, settings *TreeSettings) { | 
| + generateTreeInternal(r, rootdir, 0, settings) | 
| + return | 
| +} |