Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The LUCI Authors. All rights reserved. | |
| 2 // Use of this source code is governed under the Apache License, Version 2.0 | |
| 3 // that can be found in the LICENSE file. | |
| 4 | |
| 5 // This function works strangely for performance reasons. | |
| 6 // | |
| 7 // File systems have been heavily optimised for doing a directory walk in inode | |
| 8 // order. It can be an order of magnitude faster to walk the directory in this | |
| 9 // order so we do. *However*, we want out output to be in sorted so it is | |
| 10 // deterministic. | |
| 11 // | |
| 12 // Calling `stat` is also one of the most expensive things you can do (it is | |
| 13 // roughly equivalent to reading 64/128k of data). Hence, if you have a lot of | |
| 14 // small files then just reading their contents directly is more efficient. | |
| 15 // Rather then doing the stat, we assume everything is a file and just try to | |
| 16 // read a chunk. If the file is smaller than the block size, we know that we | |
| 17 // have the entire contents. Otherwise we know the file is bigger and can | |
| 18 // decide to do the stat. If the name turned out to be a directory, then we | |
| 19 // will get an error. | |
| 20 | |
| 21 package dirwalk | |
| 22 | |
| 23 import ( | |
| 24 "io" | |
| 25 "os" | |
| 26 "path/filepath" | |
| 27 "sort" | |
| 28 ) | |
| 29 | |
| 30 type SmallFile struct { | |
| 31 name string | |
| 32 data []byte | |
| 33 } | |
| 34 type SmallFileByName []SmallFile | |
| 35 | |
| 36 func (a SmallFileByName) Len() int { return len(a) } | |
| 37 func (a SmallFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
| 38 func (a SmallFileByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
| 39 | |
| 40 /* | |
| 41 type LargeFile struct { | |
| 42 name string | |
| 43 } | |
| 44 type LargeFileByName []LargeFile | |
| 45 func (a LargeFileByName) Len() int { return len(a) } | |
| 46 func (a LargeFileByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
| 47 func (a LargeFileByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
| 48 */ | |
| 49 | |
| 50 type EntryError struct { | |
| 51 name string | |
| 52 err error | |
| 53 } | |
| 54 type EntryErrorByName []EntryError | |
| 55 | |
| 56 func (a EntryErrorByName) Len() int { return len(a) } | |
| 57 func (a EntryErrorByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
| 58 func (a EntryErrorByName) Less(i, j int) bool { return a[i].name < a[j].name } | |
| 59 | |
| 60 func walkNoStatInternal(base string, files []string, smallfile_limit int64, obs WalkObserver) { | |
| 61 var errors []EntryError | |
| 62 var smallfiles []SmallFile | |
| 63 var largefiles []string | |
| 64 var dirs []EntryError | |
| 65 | |
| 66 for _, name := range files { | |
| 67 fname := filepath.Join(base, name) | |
| 68 file, err := os.Open(fname) | |
| 69 | |
| 70 if err != nil { | |
| 71 errors = append(errors, EntryError{fname, err}) | |
| 72 continue | |
| 73 } | |
| 74 | |
| 75 block := make([]byte, smallfile_limit) | |
| 76 count, err := file.Read(block) | |
| 77 if err != io.EOF && err != nil { | |
| 78 // Its probably a directory | |
| 79 dirs = append(dirs, EntryError{fname, err}) | |
| 80 continue | |
| 81 } | |
| 82 | |
| 83 // This file was bigger than the block size, stat it | |
| 84 if int64(count) == smallfile_limit { | |
| 85 /* | |
| 86 stat, err := file.Stat() | |
| 87 if err != nil { | |
| 88 errors = append(errors, EntryError{fname , err}) | |
| 89 continue | |
| 90 } | |
| 91 */ | |
| 92 largefiles = append(largefiles, fname) //LargeFile{name: fname, stat: &stat}) | |
| 93 | |
| 94 // This file was smaller than the block size | |
| 95 } else { | |
| 96 smallfiles = append(smallfiles, SmallFile{name: fname, d ata: block[:count]}) | |
| 97 } | |
| 98 file.Close() | |
| 99 } | |
| 100 | |
| 101 sort.Sort(SmallFileByName(smallfiles)) | |
|
M-A Ruel
2016/09/23 01:48:17
Why pass the file names in order?
| |
| 102 for _, f := range smallfiles { | |
| 103 obs.SmallFile(f.name, f.data) | |
| 104 } | |
| 105 | |
| 106 sort.Strings(largefiles) | |
| 107 for _, fname := range largefiles { | |
| 108 obs.LargeFile(fname) | |
| 109 } | |
| 110 | |
| 111 sort.Sort(EntryErrorByName(dirs)) | |
| 112 for _, d := range dirs { | |
| 113 file, err := os.Open(d.name) | |
| 114 if err != nil { | |
| 115 obs.Error(d.name, d.err) | |
| 116 continue | |
| 117 } | |
| 118 | |
| 119 names, err := file.Readdirnames(0) | |
| 120 if err != nil { | |
| 121 obs.Error(d.name, d.err) | |
| 122 continue | |
| 123 } | |
| 124 walkNoStatInternal(d.name, names, smallfile_limit, obs) | |
| 125 } | |
| 126 } | |
| 127 | |
| 128 func WalkNoStat(root string, smallfile_limit int64, obs WalkObserver) { | |
| 129 paths := []string{root} | |
| 130 walkNoStatInternal("", paths, smallfile_limit, obs) | |
| 131 obs.Finished() | |
| 132 } | |
| OLD | NEW |