Index: common/dirwalk/tests/tools/gendir/generate.go |
diff --git a/common/dirwalk/tests/tools/gendir/generate.go b/common/dirwalk/tests/tools/gendir/generate.go |
new file mode 100644 |
index 0000000000000000000000000000000000000000..884384de8614f61d7d2ba7785d20c88803bf1832 |
--- /dev/null |
+++ b/common/dirwalk/tests/tools/gendir/generate.go |
@@ -0,0 +1,246 @@ |
+// Copyright 2016 The LUCI Authors. All rights reserved. |
+// Use of this source code is governed under the Apache License, Version 2.0 |
+// that can be found in the LICENSE file. |
+ |
+package main |
+ |
+// Tools for generating test directories. |
+ |
+import ( |
+ "fmt" |
+ "log" |
+ "math/rand" |
+ "os" |
+ "path" |
+ |
+ "github.com/dustin/go-humanize" |
+) |
+ |
+func min(a uint64, b uint64) uint64 { |
+ if a > b { |
+ return b |
+ } else { |
M-A Ruel
2016/09/15 14:31:02
}
no need for else
same below
mithro
2016/09/20 12:41:44
Done.
|
+ return a |
+ } |
+} |
+func max(a uint64, b uint64) uint64 { |
+ if a < b { |
+ return b |
+ } else { |
+ return a |
+ } |
+} |
+ |
+func randChar(r *rand.Rand, runes []rune) rune { |
+ return runes[r.Intn(len(runes))] |
+} |
+ |
+func randStr(r *rand.Rand, length uint64, runes []rune) string { |
M-A Ruel
2016/09/15 14:31:02
I'd prefer length to be int.
It's a bit confusing
mithro
2016/09/20 12:41:44
Negative length isn't valid though?
M-A Ruel
2016/09/20 16:37:27
uint is sparingly used in Go, it's really only use
|
+ str := make([]rune, length) |
+ for i := range str { |
+ str[i] = randChar(r, runes) |
+ } |
+ return string(str) |
+} |
+ |
+func randBetween(r *rand.Rand, min uint64, max uint64) uint64 { |
+ if min == max { |
+ return min |
+ } |
+ return uint64(r.Int63n(int64(max-min))) + min |
+} |
+ |
+// FIXME: Maybe some UTF-8 characters? |
+var filenameChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-") |
+ |
+func filenameRandom(r *rand.Rand, length uint64) string { |
+ return randStr(r, length, filenameChars) |
+} |
+ |
+type DirGen interface { |
+ Create(seed uint64, num int) |
+} |
+ |
+type FileType int |
M-A Ruel
2016/09/15 14:31:02
I'd prefer to call it Compressibility and have a N
mithro
2016/09/20 12:41:44
I think it is the *type* which is the important fa
|
+ |
+const ( |
+ FILETYPE_BIN_RAND FileType = iota // Truly random binary data (totally uncompressible) |
+ FILETYPE_TXT_RAND // Truly random text data (mostly uncompressible) |
+ FILETYPE_BIN_REPEAT // Repeated binary data (compressible) |
+ FILETYPE_TXT_REPEAT // Repeated text data (very compressible) |
+ FILETYPE_TXT_LOREM // Lorem Ipsum txt data (very compressible) |
+ |
+ FILETYPE_MAX |
+) |
+ |
+var FileTypeName []string = []string{ |
+ "Random Binary", |
+ "Random Text", |
+ "Repeated Binary", |
+ "Repeated Text", |
+ "Lorem Text", |
+} |
+ |
+func (f FileType) String() string { |
+ return FileTypeName[int(f)] |
+} |
+ |
+// FIXME: Maybe some UTF-8 characters? |
M-A Ruel
2016/09/15 14:31:02
It depends; for file paths, you want UTF-8. For co
|
+var textChars = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-.") |
+ |
+const ( |
+ BLOCKSIZE uint64 = 1 * 1024 * 1024 // 1 Megabyte |
+ |
+ // Maximum 4k long repeated sequences |
+ SEQUENCE_MINSIZE uint64 = 16 |
+ SEQUENCE_MAXSIZE uint64 = 4 * 1024 |
+) |
+ |
+func writeFile(r *rand.Rand, filename string, filetype FileType, filesize uint64) { |
+ f, err := os.Create(filename) |
+ if err != nil { |
+ log.Fatal(err) |
M-A Ruel
2016/09/15 14:31:02
return an error, no need to crash.
mithro
2016/09/20 12:41:44
We can't really recover in this case?
We depend o
M-A Ruel
2016/09/20 16:37:27
You return the error, the error is surfaced to the
mithro
2016/09/22 11:19:58
Bubbling that error all the way up to the top seem
|
+ } |
+ defer f.Close() |
+ |
+ var written uint64 = 0 |
+ for written < filesize { |
+ content := make([]byte, min(filesize-written, BLOCKSIZE)) |
+ |
+ // Generate a block of content |
+ switch filetype { |
+ case FILETYPE_BIN_RAND: |
M-A Ruel
2016/09/15 14:31:02
You should create a generator so basically:
// G
mithro
2016/09/20 12:41:44
There was a specific reason I didn't do a generato
mithro
2016/09/22 11:19:58
Done.
|
+ r.Read(content) |
+ |
+ case FILETYPE_TXT_RAND: |
+ // Runes can be multiple bytes long |
+ for i := 0; i < len(content); { |
+ bytes := []byte(string(randChar(r, textChars))) |
+ for j := range bytes { |
+ content[i+j] = bytes[j] |
+ } |
+ i += len(bytes) |
+ } |
+ |
+ case FILETYPE_BIN_REPEAT: |
+ var sequence []byte = make([]byte, randBetween(r, SEQUENCE_MINSIZE, SEQUENCE_MAXSIZE)) |
+ r.Read(sequence) |
+ |
+ for i := range content { |
+ content[i] = sequence[i%len(sequence)] |
+ } |
+ |
+ case FILETYPE_TXT_REPEAT, FILETYPE_TXT_LOREM: |
+ var sequence []byte |
+ |
+ switch filetype { |
+ case FILETYPE_TXT_REPEAT: |
+ // FIXME: As runes can be multiple bytes long, this could technical |
+ // be longer then SEQUENCE_MAXSIZE, but don't think we care? |
+ sequence = []byte(randStr(r, randBetween(r, SEQUENCE_MINSIZE, SEQUENCE_MAXSIZE), textChars)) |
+ case FILETYPE_TXT_LOREM: |
+ sequence = []byte(lorem) |
+ } |
+ |
+ for i := range content { |
+ content[i] = sequence[i%len(sequence)] |
+ } |
+ } |
+ f.Write(content) |
+ written += uint64(len(content)) |
+ } |
+} |
+ |
+const ( |
+ FILENAME_MINSIZE uint64 = 4 |
+ FILENAME_MAXSIZE uint64 = 20 |
+) |
+ |
+// Generate num files between (min, max) size |
+func GenerateFiles(r *rand.Rand, dir string, num uint64, filesize_min uint64, filesize_max uint64) { |
M-A Ruel
2016/09/15 14:31:02
One thing I care a lot about is to ensure that the
mithro
2016/09/20 12:41:44
This kind of function kind of exists at the bottom
|
+ for i := uint64(0); i < num; i++ { |
+ var filename string |
+ var filepath string |
+ for true { |
+ filename = filenameRandom(r, randBetween(r, FILENAME_MINSIZE, FILENAME_MAXSIZE)) |
+ filepath = path.Join(dir, filename) |
+ if _, err := os.Stat(filepath); os.IsNotExist(err) { |
+ break |
+ } |
+ } |
+ filetype := FileType(r.Intn(int(FILETYPE_MAX))) |
+ filesize := randBetween(r, filesize_min, filesize_max) |
+ |
+ if num < 1000 { |
+ fmt.Printf("File: %-40s %-20s (%s)\n", filename, filetype.String(), humanize.Bytes(filesize)) |
+ } |
+ writeFile(r, filepath, filetype, filesize) |
+ } |
+} |
+ |
+// Generate num directories |
+func GenerateDirs(r *rand.Rand, dir string, num uint64) []string { |
+ var result []string |
+ |
+ for i := uint64(0); i < num; i++ { |
+ var dirname string |
+ var dirpath string |
+ for true { |
+ dirname = filenameRandom(r, randBetween(r, FILENAME_MINSIZE, FILENAME_MAXSIZE)) |
+ dirpath = path.Join(dir, dirname) |
+ if _, err := os.Stat(dirpath); os.IsNotExist(err) { |
+ break |
+ } |
+ } |
+ |
+ if err := os.MkdirAll(dirpath, 0755); err != nil { |
+ log.Fatal(err) |
M-A Ruel
2016/09/20 16:37:27
same here.
|
+ } |
+ result = append(result, dirpath) |
+ } |
+ return result |
+} |
+ |
+type FileSettings struct { |
+ MinNumber uint64 |
+ MaxNumber uint64 |
+ MinSize uint64 |
+ MaxSize uint64 |
+} |
+ |
+type DirSettings struct { |
+ Number []uint64 |
+ MinFileDepth uint64 |
+} |
+ |
+type TreeSettings struct { |
+ Files []FileSettings |
+ Dir DirSettings |
+} |
+ |
+func generateTreeInternal(r *rand.Rand, dir string, depth uint64, settings *TreeSettings) { |
+ fmt.Printf("%04d:%s -->\n", depth, dir) |
+ // Generate the files in this directory |
+ if depth >= settings.Dir.MinFileDepth { |
+ for _, files := range settings.Files { |
+ numfiles := randBetween(r, files.MinNumber, files.MaxNumber) |
+ fmt.Printf("%04d:%s: Generating %d files (between %s and %s)\n", depth, dir, numfiles, humanize.Bytes(files.MinSize), humanize.Bytes(files.MaxSize)) |
+ GenerateFiles(r, dir, numfiles, files.MinSize, files.MaxSize) |
+ } |
+ } |
+ |
+ // Generate another depth of directories |
+ if depth < uint64(len(settings.Dir.Number)) { |
+ numdirs := settings.Dir.Number[depth] |
+ fmt.Printf("%04d:%s: Generating %d directories\n", depth, dir, numdirs) |
+ for _, childpath := range GenerateDirs(r, dir, numdirs) { |
+ generateTreeInternal(r, childpath, depth+1, settings) |
+ } |
+ } |
+ fmt.Printf("%04d:%s <--\n", depth, dir) |
+} |
+ |
+func GenerateTree(r *rand.Rand, rootdir string, settings *TreeSettings) { |
+ generateTreeInternal(r, rootdir, 0, settings) |
+ return |
+} |