Chromium Code Reviews| Index: common/dirwalk/tests/tools/gendir/generate.go |
| diff --git a/common/dirwalk/tests/tools/gendir/generate.go b/common/dirwalk/tests/tools/gendir/generate.go |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..884384de8614f61d7d2ba7785d20c88803bf1832 |
| --- /dev/null |
| +++ b/common/dirwalk/tests/tools/gendir/generate.go |
| @@ -0,0 +1,246 @@ |
| +// Copyright 2016 The LUCI Authors. All rights reserved. |
| +// Use of this source code is governed under the Apache License, Version 2.0 |
| +// that can be found in the LICENSE file. |
| + |
| +package main |
| + |
| +// Tools for generating test directories. |
| + |
| +import ( |
| + "fmt" |
| + "log" |
| + "math/rand" |
| + "os" |
| + "path" |
| + |
| + "github.com/dustin/go-humanize" |
| +) |
| + |
| +func min(a uint64, b uint64) uint64 { |
| + if a > b { |
| + return b |
| + } else { |
|
M-A Ruel
2016/09/15 14:31:02
}
no need for else
same below
mithro
2016/09/20 12:41:44
Done.
|
| + return a |
| + } |
| +} |
| +func max(a uint64, b uint64) uint64 { |
| + if a < b { |
| + return b |
| + } else { |
| + return a |
| + } |
| +} |
| + |
| +func randChar(r *rand.Rand, runes []rune) rune { |
| + return runes[r.Intn(len(runes))] |
| +} |
| + |
| +func randStr(r *rand.Rand, length uint64, runes []rune) string { |
|
M-A Ruel
2016/09/15 14:31:02
I'd prefer length to be int.
It's a bit confusing
mithro
2016/09/20 12:41:44
Negative length isn't valid though?
M-A Ruel
2016/09/20 16:37:27
uint is sparingly used in Go, it's really only use
|
| + str := make([]rune, length) |
| + for i := range str { |
| + str[i] = randChar(r, runes) |
| + } |
| + return string(str) |
| +} |
| + |
| +func randBetween(r *rand.Rand, min uint64, max uint64) uint64 { |
| + if min == max { |
| + return min |
| + } |
| + return uint64(r.Int63n(int64(max-min))) + min |
| +} |
| + |
| +// FIXME: Maybe some UTF-8 characters? |
| +var filenameChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-") |
| + |
| +func filenameRandom(r *rand.Rand, length uint64) string { |
| + return randStr(r, length, filenameChars) |
| +} |
| + |
| +type DirGen interface { |
| + Create(seed uint64, num int) |
| +} |
| + |
| +type FileType int |
|
M-A Ruel
2016/09/15 14:31:02
I'd prefer to call it Compressibility and have a N
mithro
2016/09/20 12:41:44
I think it is the *type* which is the important fa
|
| + |
| +const ( |
| + FILETYPE_BIN_RAND FileType = iota // Truly random binary data (totally uncompressible) |
| + FILETYPE_TXT_RAND // Truly random text data (mostly uncompressible) |
| + FILETYPE_BIN_REPEAT // Repeated binary data (compressible) |
| + FILETYPE_TXT_REPEAT // Repeated text data (very compressible) |
| + FILETYPE_TXT_LOREM // Lorem Ipsum txt data (very compressible) |
| + |
| + FILETYPE_MAX |
| +) |
| + |
| +var FileTypeName []string = []string{ |
| + "Random Binary", |
| + "Random Text", |
| + "Repeated Binary", |
| + "Repeated Text", |
| + "Lorem Text", |
| +} |
| + |
| +func (f FileType) String() string { |
| + return FileTypeName[int(f)] |
| +} |
| + |
| +// FIXME: Maybe some UTF-8 characters? |
|
M-A Ruel
2016/09/15 14:31:02
It depends; for file paths, you want UTF-8. For co
|
| +var textChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-.") |
| + |
| +const ( |
| + BLOCKSIZE uint64 = 1 * 1024 * 1024 // 1 Megabyte |
| + |
| + // Maximum 4k long repeated sequences |
| + SEQUENCE_MINSIZE uint64 = 16 |
| + SEQUENCE_MAXSIZE uint64 = 4 * 1024 |
| +) |
| + |
| +func writeFile(r *rand.Rand, filename string, filetype FileType, filesize uint64) { |
| + f, err := os.Create(filename) |
| + if err != nil { |
| + log.Fatal(err) |
|
M-A Ruel
2016/09/15 14:31:02
return an error, no need to crash.
mithro
2016/09/20 12:41:44
We can't really recover in this case?
We depend o
M-A Ruel
2016/09/20 16:37:27
You return the error, the error is surfaced to the
mithro
2016/09/22 11:19:58
Bubbling that error all the way up to the top seem
|
| + } |
| + defer f.Close() |
| + |
| + var written uint64 = 0 |
| + for written < filesize { |
| + content := make([]byte, min(filesize-written, BLOCKSIZE)) |
| + |
| + // Generate a block of content |
| + switch filetype { |
| + case FILETYPE_BIN_RAND: |
|
M-A Ruel
2016/09/15 14:31:02
You should create a generator so basically:
// G
mithro
2016/09/20 12:41:44
There was a specific reason I didn't do a generato
mithro
2016/09/22 11:19:58
Done.
|
| + r.Read(content) |
| + |
| + case FILETYPE_TXT_RAND: |
| + // Runes can be multiple bytes long |
| + for i := 0; i < len(content); { |
| + bytes := []byte(string(randChar(r, textChars))) |
| + for j := range bytes { |
| + content[i+j] = bytes[j] |
| + } |
| + i += len(bytes) |
| + } |
| + |
| + case FILETYPE_BIN_REPEAT: |
| + var sequence []byte = make([]byte, randBetween(r, SEQUENCE_MINSIZE, SEQUENCE_MAXSIZE)) |
| + r.Read(sequence) |
| + |
| + for i := range content { |
| + content[i] = sequence[i%len(sequence)] |
| + } |
| + |
| + case FILETYPE_TXT_REPEAT, FILETYPE_TXT_LOREM: |
| + var sequence []byte |
| + |
| + switch filetype { |
| + case FILETYPE_TXT_REPEAT: |
| + // FIXME: As runes can be multiple bytes long, this could technical |
| + // be longer then SEQUENCE_MAXSIZE, but don't think we care? |
| + sequence = []byte(randStr(r, randBetween(r, SEQUENCE_MINSIZE, SEQUENCE_MAXSIZE), textChars)) |
| + case FILETYPE_TXT_LOREM: |
| + sequence = []byte(lorem) |
| + } |
| + |
| + for i := range content { |
| + content[i] = sequence[i%len(sequence)] |
| + } |
| + } |
| + f.Write(content) |
| + written += uint64(len(content)) |
| + } |
| +} |
| + |
| +const ( |
| + FILENAME_MINSIZE uint64 = 4 |
| + FILENAME_MAXSIZE uint64 = 20 |
| +) |
| + |
| +// Generate num files between (min, max) size |
| +func GenerateFiles(r *rand.Rand, dir string, num uint64, filesize_min uint64, filesize_max uint64) { |
|
M-A Ruel
2016/09/15 14:31:02
One thing I care a lot about is to ensure that the
mithro
2016/09/20 12:41:44
This kind of function kind of exists at the bottom
|
| + for i := uint64(0); i < num; i++ { |
| + var filename string |
| + var filepath string |
| + for true { |
| + filename = filenameRandom(r, randBetween(r, FILENAME_MINSIZE, FILENAME_MAXSIZE)) |
| + filepath = path.Join(dir, filename) |
| + if _, err := os.Stat(filepath); os.IsNotExist(err) { |
| + break |
| + } |
| + } |
| + filetype := FileType(r.Intn(int(FILETYPE_MAX))) |
| + filesize := randBetween(r, filesize_min, filesize_max) |
| + |
| + if num < 1000 { |
| + fmt.Printf("File: %-40s %-20s (%s)\n", filename, filetype.String(), humanize.Bytes(filesize)) |
| + } |
| + writeFile(r, filepath, filetype, filesize) |
| + } |
| +} |
| + |
| +// Generate num directories |
| +func GenerateDirs(r *rand.Rand, dir string, num uint64) []string { |
| + var result []string |
| + |
| + for i := uint64(0); i < num; i++ { |
| + var dirname string |
| + var dirpath string |
| + for true { |
| + dirname = filenameRandom(r, randBetween(r, FILENAME_MINSIZE, FILENAME_MAXSIZE)) |
| + dirpath = path.Join(dir, dirname) |
| + if _, err := os.Stat(dirpath); os.IsNotExist(err) { |
| + break |
| + } |
| + } |
| + |
| + if err := os.MkdirAll(dirpath, 0755); err != nil { |
| + log.Fatal(err) |
|
M-A Ruel
2016/09/20 16:37:27
same here.
|
| + } |
| + result = append(result, dirpath) |
| + } |
| + return result |
| +} |
| + |
| +type FileSettings struct { |
| + MinNumber uint64 |
| + MaxNumber uint64 |
| + MinSize uint64 |
| + MaxSize uint64 |
| +} |
| + |
| +type DirSettings struct { |
| + Number []uint64 |
| + MinFileDepth uint64 |
| +} |
| + |
| +type TreeSettings struct { |
| + Files []FileSettings |
| + Dir DirSettings |
| +} |
| + |
| +func generateTreeInternal(r *rand.Rand, dir string, depth uint64, settings *TreeSettings) { |
| + fmt.Printf("%04d:%s -->\n", depth, dir) |
| + // Generate the files in this directory |
| + if depth >= settings.Dir.MinFileDepth { |
| + for _, files := range settings.Files { |
| + numfiles := randBetween(r, files.MinNumber, files.MaxNumber) |
| + fmt.Printf("%04d:%s: Generating %d files (between %s and %s)\n", depth, dir, numfiles, humanize.Bytes(files.MinSize), humanize.Bytes(files.MaxSize)) |
| + GenerateFiles(r, dir, numfiles, files.MinSize, files.MaxSize) |
| + } |
| + } |
| + |
| + // Generate another depth of directories |
| + if depth < uint64(len(settings.Dir.Number)) { |
| + numdirs := settings.Dir.Number[depth] |
| + fmt.Printf("%04d:%s: Generating %d directories\n", depth, dir, numdirs) |
| + for _, childpath := range GenerateDirs(r, dir, numdirs) { |
| + generateTreeInternal(r, childpath, depth+1, settings) |
| + } |
| + } |
| + fmt.Printf("%04d:%s <--\n", depth, dir) |
| +} |
| + |
| +func GenerateTree(r *rand.Rand, rootdir string, settings *TreeSettings) { |
| + generateTreeInternal(r, rootdir, 0, settings) |
| + return |
| +} |