 Chromium Code Reviews
 Chromium Code Reviews Issue 2054763004:
  luci-go/common/dirwalk: Code for walking a directory tree efficiently 
  Base URL: https://github.com/luci/luci-go@master
    
  
    Issue 2054763004:
  luci-go/common/dirwalk: Code for walking a directory tree efficiently 
  Base URL: https://github.com/luci/luci-go@master| Index: common/dirwalk/walk_nostat.go | 
| diff --git a/common/dirwalk/walk_nostat.go b/common/dirwalk/walk_nostat.go | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..bcf810d83b903be201ea4a1f5ab52565f65ab3d3 | 
| --- /dev/null | 
| +++ b/common/dirwalk/walk_nostat.go | 
| @@ -0,0 +1,69 @@ | 
| +// Copyright 2016 The LUCI Authors. All rights reserved. | 
| +// Use of this source code is governed under the Apache License, Version 2.0 | 
| +// that can be found in the LICENSE file. | 
| + | 
| +package dirwalk | 
| + | 
| +import ( | 
| + "bytes" | 
| + "io" | 
| + "io/ioutil" | 
| + "os" | 
| + "path/filepath" | 
| +) | 
| + | 
| +func walkNoStatInternal(base string, files []string, smallFileLimit int64, callback WalkFunc) { | 
| + for _, name := range files { | 
| + path := filepath.Join(base, name) | 
| + | 
| + file, err := os.Open(path) | 
| + if err != nil { | 
| + callback(path, -1, nil, err) | 
| + continue | 
| + } | 
| + | 
| + block := make([]byte, smallFileLimit) | 
| + count, err := file.Read(block) | 
| + if err != io.EOF && err != nil { | 
| 
mcgreevy_g
2017/06/27 03:29:17
Note: it's valid for a Reader to return a nil erro
 | 
| + // It is probably a directory, try and list it. | 
| + dir := file | 
| + | 
| + names, err := dir.Readdirnames(0) | 
| + if err != nil { | 
| + callback(path, -1, nil, err) | 
| + continue | 
| + } | 
| + walkNoStatInternal(path, names, smallFileLimit, callback) | 
| + callback(path, -1, nil, nil) | 
| + } else { | 
| + // It was actually a file | 
| + if int64(count) == smallFileLimit { | 
| + // This file was bigger than the block size | 
| 
mcgreevy_g
2017/06/27 03:29:17
Or, perhaps it was equal to the block size and io.
 | 
| + callback(path, -1, multiReadCloser{io.MultiReader(bytes.NewReader(block), file), []io.Closer{file}}, nil) | 
| + } else { | 
| + // This file was smaller than the block size | 
| + callback(path, int64(count), ioutil.NopCloser(bytes.NewReader(block[:count])), nil) | 
| + } | 
| + } | 
| + } | 
| +} | 
| + | 
| +// WalkNoStat is an implementation of a directory tree walker which avoids | 
| +// calling stat on every file. | 
| +// | 
| +// File systems have been heavily optimised for doing a directory walk in inode | 
| +// order. It can be an order of magnitude faster to walk the directory in this | 
| +// order so we do. | 
| +// | 
| +// Calling `stat` is also one of the most expensive things you can do (it is | 
| +// roughly equivalent to reading 8/16k of data). Hence, if you have a lot of | 
| +// small files then just reading their contents directly is more efficient. | 
| +// Rather then doing the stat, we assume everything is a file and just try to | 
| +// read a chunk. If the file is smaller than the block size, we know that we | 
| +// have the entire contents. Otherwise we know the file is bigger and can | 
| +// decide to do the stat. If the name turned out to be a directory, then we | 
| +// will get an error. | 
| +func WalkNoStat(root string, smallFileLimit int64, callback WalkFunc) { | 
| + paths := []string{root} | 
| + walkNoStatInternal("", paths, smallFileLimit, callback) | 
| +} |