OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The LUCI Authors. All rights reserved. |
| 2 // Use of this source code is governed under the Apache License, Version 2.0 |
| 3 // that can be found in the LICENSE file. |
| 4 |
| 5 /** |
| 6 |
| 7 This function works strangely for performance reasons, I'll try and explain belo
w. |
| 8 |
| 9 File systems have been heavily optimised for doing a directory walk in inode |
| 10 order. It can be an order of magnitude faster to walk the directory this way. |
| 11 *However*, we want out output to be in sorted order so it is deterministic. |
| 12 |
| 13 Calling `stat` a file is one of the most expensive things you can do. It is |
| 14 equivalent to reading 64/128k of data. Hence, if you have a lot of small files |
| 15 then just reading their contents directly is more efficient. |
| 16 |
| 17 **/ |
| 18 package dirtools |
| 19 |
| 20 import ( |
| 21 "io" |
| 22 "os" |
| 23 "path/filepath" |
| 24 "sort" |
| 25 ) |
| 26 |
| 27 type SmallFile struct { |
| 28 name string |
| 29 data []byte |
| 30 } |
| 31 type SmallFileByName []SmallFile |
| 32 |
| 33 func (a SmallFileByName) Len() int { return len(a) } |
| 34 func (a SmallFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| 35 func (a SmallFileByName) Less(i, j int) bool { return a[i].name < a[j].name } |
| 36 |
| 37 /* |
| 38 type LargeFile struct { |
| 39 name string |
| 40 } |
| 41 type LargeFileByName []LargeFile |
| 42 func (a LargeFileByName) Len() int { return len(a) } |
| 43 func (a LargeFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| 44 func (a LargeFileByName) Less(i, j int) bool { return a[i].name < a[j].name } |
| 45 */ |
| 46 |
| 47 type EntryError struct { |
| 48 name string |
| 49 err error |
| 50 } |
| 51 type EntryErrorByName []EntryError |
| 52 |
| 53 func (a EntryErrorByName) Len() int { return len(a) } |
| 54 func (a EntryErrorByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| 55 func (a EntryErrorByName) Less(i, j int) bool { return a[i].name < a[j].name } |
| 56 |
| 57 func walkNoStatInternal(base string, files []string, smallfile_limit int64, obs
WalkObserver) { |
| 58 var errors []EntryError |
| 59 var smallfiles []SmallFile |
| 60 var largefiles []string |
| 61 var dirs []EntryError |
| 62 |
| 63 for _, name := range files { |
| 64 fname := filepath.Join(base, name) |
| 65 file, err := os.Open(fname) |
| 66 |
| 67 if err != nil { |
| 68 errors = append(errors, EntryError{fname, err}) |
| 69 continue |
| 70 } |
| 71 |
| 72 block := make([]byte, smallfile_limit) |
| 73 count, err := file.Read(block) |
| 74 if err != io.EOF && err != nil { |
| 75 // Its probably a directory |
| 76 dirs = append(dirs, EntryError{fname, err}) |
| 77 continue |
| 78 } |
| 79 |
| 80 // This file was bigger than the block size, stat it |
| 81 if int64(count) == smallfile_limit { |
| 82 /* |
| 83 stat, err := file.Stat() |
| 84 if err != nil { |
| 85 errors = append(errors, EntryError{fname
, err}) |
| 86 continue |
| 87 } |
| 88 */ |
| 89 largefiles = append(largefiles, fname) //LargeFile{name:
fname, stat: &stat}) |
| 90 |
| 91 // This file was smaller than the block size |
| 92 } else { |
| 93 smallfiles = append(smallfiles, SmallFile{name: fname, d
ata: block[:count]}) |
| 94 } |
| 95 file.Close() |
| 96 } |
| 97 |
| 98 sort.Sort(SmallFileByName(smallfiles)) |
| 99 for _, f := range smallfiles { |
| 100 obs.SmallFile(f.name, f.data) |
| 101 } |
| 102 |
| 103 sort.Strings(largefiles) |
| 104 for _, fname := range largefiles { |
| 105 obs.LargeFile(fname) |
| 106 } |
| 107 |
| 108 sort.Sort(EntryErrorByName(dirs)) |
| 109 for _, d := range dirs { |
| 110 file, err := os.Open(d.name) |
| 111 if err != nil { |
| 112 obs.Error(d.name, d.err) |
| 113 continue |
| 114 } |
| 115 |
| 116 names, err := file.Readdirnames(0) |
| 117 if err != nil { |
| 118 obs.Error(d.name, d.err) |
| 119 continue |
| 120 } |
| 121 walkNoStatInternal(d.name, names, smallfile_limit, obs) |
| 122 } |
| 123 } |
| 124 |
| 125 func WalkNoStat(root string, smallfile_limit int64, obs WalkObserver) { |
| 126 paths := []string{root} |
| 127 walkNoStatInternal("", paths, smallfile_limit, obs) |
| 128 } |
OLD | NEW |