OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The LUCI Authors. All rights reserved. |
| 2 // Use of this source code is governed under the Apache License, Version 2.0 |
| 3 // that can be found in the LICENSE file. |
| 4 |
| 5 // This function works strangely for performance reasons. |
| 6 // |
| 7 // File systems have been heavily optimised for doing a directory walk in inode |
| 8 // order. It can be an order of magnitude faster to walk the directory in this |
| 9 // order so we do. *However*, we want out output to be in sorted so it is |
| 10 // deterministic. |
| 11 // |
| 12 // Calling `stat` is also one of the most expensive things you can do (it is |
| 13 // roughly equivalent to reading 64/128k of data). Hence, if you have a lot of |
| 14 // small files then just reading their contents directly is more efficient. |
| 15 // Rather then doing the stat, we assume everything is a file and just try to |
| 16 // read a chunk. If the file is smaller than the block size, we know that we |
| 17 // have the entire contents. Otherwise we know the file is bigger and can |
| 18 // decide to do the stat. If the name turned out to be a directory, then we |
| 19 // will get an error. |
| 20 |
| 21 package dirwalk |
| 22 |
| 23 import ( |
| 24 "io" |
| 25 "os" |
| 26 "path/filepath" |
| 27 "sort" |
| 28 ) |
| 29 |
| 30 type SmallFile struct { |
| 31 name string |
| 32 data []byte |
| 33 } |
| 34 type SmallFileByName []SmallFile |
| 35 |
| 36 func (a SmallFileByName) Len() int { return len(a) } |
| 37 func (a SmallFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| 38 func (a SmallFileByName) Less(i, j int) bool { return a[i].name < a[j].name } |
| 39 |
| 40 /* |
| 41 type LargeFile struct { |
| 42 name string |
| 43 } |
| 44 type LargeFileByName []LargeFile |
| 45 func (a LargeFileByName) Len() int { return len(a) } |
| 46 func (a LargeFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| 47 func (a LargeFileByName) Less(i, j int) bool { return a[i].name < a[j].name } |
| 48 */ |
| 49 |
| 50 type EntryError struct { |
| 51 name string |
| 52 err error |
| 53 } |
| 54 type EntryErrorByName []EntryError |
| 55 |
| 56 func (a EntryErrorByName) Len() int { return len(a) } |
| 57 func (a EntryErrorByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| 58 func (a EntryErrorByName) Less(i, j int) bool { return a[i].name < a[j].name } |
| 59 |
| 60 func walkNoStatInternal(base string, files []string, smallfile_limit int64, obs
WalkObserver) { |
| 61 var errors []EntryError |
| 62 var smallfiles []SmallFile |
| 63 var largefiles []string |
| 64 var dirs []EntryError |
| 65 |
| 66 for _, name := range files { |
| 67 fname := filepath.Join(base, name) |
| 68 file, err := os.Open(fname) |
| 69 |
| 70 if err != nil { |
| 71 errors = append(errors, EntryError{fname, err}) |
| 72 continue |
| 73 } |
| 74 |
| 75 block := make([]byte, smallfile_limit) |
| 76 count, err := file.Read(block) |
| 77 if err != io.EOF && err != nil { |
| 78 // Its probably a directory |
| 79 dirs = append(dirs, EntryError{fname, err}) |
| 80 continue |
| 81 } |
| 82 |
| 83 // This file was bigger than the block size, stat it |
| 84 if int64(count) == smallfile_limit { |
| 85 /* |
| 86 stat, err := file.Stat() |
| 87 if err != nil { |
| 88 errors = append(errors, EntryError{fname
, err}) |
| 89 continue |
| 90 } |
| 91 */ |
| 92 largefiles = append(largefiles, fname) //LargeFile{name:
fname, stat: &stat}) |
| 93 |
| 94 // This file was smaller than the block size |
| 95 } else { |
| 96 smallfiles = append(smallfiles, SmallFile{name: fname, d
ata: block[:count]}) |
| 97 } |
| 98 file.Close() |
| 99 } |
| 100 |
| 101 sort.Sort(SmallFileByName(smallfiles)) |
| 102 for _, f := range smallfiles { |
| 103 obs.SmallFile(f.name, f.data) |
| 104 } |
| 105 |
| 106 sort.Strings(largefiles) |
| 107 for _, fname := range largefiles { |
| 108 obs.LargeFile(fname) |
| 109 } |
| 110 |
| 111 sort.Sort(EntryErrorByName(dirs)) |
| 112 for _, d := range dirs { |
| 113 file, err := os.Open(d.name) |
| 114 if err != nil { |
| 115 obs.Error(d.name, d.err) |
| 116 continue |
| 117 } |
| 118 |
| 119 names, err := file.Readdirnames(0) |
| 120 if err != nil { |
| 121 obs.Error(d.name, d.err) |
| 122 continue |
| 123 } |
| 124 walkNoStatInternal(d.name, names, smallfile_limit, obs) |
| 125 } |
| 126 } |
| 127 |
| 128 func WalkNoStat(root string, smallfile_limit int64, obs WalkObserver) { |
| 129 paths := []string{root} |
| 130 walkNoStatInternal("", paths, smallfile_limit, obs) |
| 131 obs.Finished() |
| 132 } |
OLD | NEW |